Compare commits

...

5 commits

Author SHA1 Message Date
Jan Böhmer
cb669ad4ec Fixed phpstan issues
Some checks failed
Build assets artifact / Build assets artifact (push) Has been cancelled
Docker Image Build / build (linux/amd64, amd64, ubuntu-latest) (push) Has been cancelled
Docker Image Build / build (linux/arm/v7, armv7, ubuntu-24.04-arm) (push) Has been cancelled
Docker Image Build / build (linux/arm64, arm64, ubuntu-24.04-arm) (push) Has been cancelled
Docker Image Build (FrankenPHP) / build (linux/amd64, amd64, ubuntu-latest) (push) Has been cancelled
Docker Image Build (FrankenPHP) / build (linux/arm/v7, armv7, ubuntu-24.04-arm) (push) Has been cancelled
Docker Image Build (FrankenPHP) / build (linux/arm64, arm64, ubuntu-24.04-arm) (push) Has been cancelled
Static analysis / Static analysis (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, sqlite) (push) Has been cancelled
Docker Image Build / merge (push) Has been cancelled
Docker Image Build (FrankenPHP) / merge (push) Has been cancelled
2026-05-06 00:08:14 +02:00
Jan Böhmer
2e8ab8190a Bumped to version 2.11.1 2026-05-05 23:53:03 +02:00
Jan Böhmer
98c978ff1b Improved RandomizeUseragentHttpClient by not using old user agent strings, but different modernn profiles where also other headers match the user agent 2026-05-05 23:52:14 +02:00
Jan Böhmer
38779740ec AIWebProvider: Make URLs absolute before passing them to the LLM
This ensures that the URLs are valid afterwards, because the LLM does not know the base tag
2026-05-05 23:19:56 +02:00
Jan Böhmer
9c6f9a25c5 Use new watchtower image in configuation example
Some checks are pending
Build assets artifact / Build assets artifact (push) Waiting to run
Docker Image Build / build (linux/amd64, amd64, ubuntu-latest) (push) Waiting to run
Docker Image Build / build (linux/arm/v7, armv7, ubuntu-24.04-arm) (push) Waiting to run
Docker Image Build / build (linux/arm64, arm64, ubuntu-24.04-arm) (push) Waiting to run
Docker Image Build / merge (push) Blocked by required conditions
Docker Image Build (FrankenPHP) / build (linux/amd64, amd64, ubuntu-latest) (push) Waiting to run
Docker Image Build (FrankenPHP) / build (linux/arm/v7, armv7, ubuntu-24.04-arm) (push) Waiting to run
Docker Image Build (FrankenPHP) / build (linux/arm64, arm64, ubuntu-24.04-arm) (push) Waiting to run
Docker Image Build (FrankenPHP) / merge (push) Blocked by required conditions
Static analysis / Static analysis (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, mysql) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, mysql) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, mysql) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, mysql) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, postgres) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, postgres) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, postgres) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, postgres) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, sqlite) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, sqlite) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, sqlite) (push) Waiting to run
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, sqlite) (push) Waiting to run
Fixes issue #1363
2026-05-05 22:41:38 +02:00
5 changed files with 176 additions and 77 deletions

View file

@ -1 +1 @@
2.11.0
2.11.1

53
composer.lock generated
View file

@ -17374,25 +17374,25 @@
},
{
"name": "symplify/easy-coding-standard",
"version": "13.1.2",
"version": "13.1.3",
"source": {
"type": "git",
"url": "https://github.com/easy-coding-standard/ecs.git",
"reference": "6d22473d1f36945884d8cb291777166020a47770"
"reference": "d894d088d7ebb9326f9eed28bf251481c813b89f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/easy-coding-standard/ecs/zipball/6d22473d1f36945884d8cb291777166020a47770",
"reference": "6d22473d1f36945884d8cb291777166020a47770",
"url": "https://api.github.com/repos/easy-coding-standard/ecs/zipball/d894d088d7ebb9326f9eed28bf251481c813b89f",
"reference": "d894d088d7ebb9326f9eed28bf251481c813b89f",
"shasum": ""
},
"require": {
"php": ">=7.2"
},
"conflict": {
"friendsofphp/php-cs-fixer": "<3.92.4",
"friendsofphp/php-cs-fixer": "<3.95.1",
"phpcsstandards/php_codesniffer": "<4.0.1",
"symplify/coding-standard": "<12.1"
"symplify/coding-standard": "<13.0"
},
"suggest": {
"ext-dom": "Needed to support checkstyle output format in class CheckstyleOutputFormatter"
@ -17418,20 +17418,9 @@
"static analysis"
],
"support": {
"issues": "https://github.com/easy-coding-standard/ecs/issues",
"source": "https://github.com/easy-coding-standard/ecs/tree/13.1.2"
"source": "https://github.com/easy-coding-standard/ecs/tree/13.1.3"
},
"funding": [
{
"url": "https://www.paypal.me/rectorphp",
"type": "custom"
},
{
"url": "https://github.com/tomasvotruba",
"type": "github"
}
],
"time": "2026-05-03T22:05:09+00:00"
"time": "2026-05-04T21:45:57+00:00"
},
{
"name": "tecnickcom/tc-lib-barcode",
@ -20189,12 +20178,12 @@
"source": {
"type": "git",
"url": "https://github.com/Roave/SecurityAdvisories.git",
"reference": "2221f6ef09e87784e78e188aadd8f7e3a50e679a"
"reference": "9d468c11a8da481c22b4e610494babae032fdb03"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/2221f6ef09e87784e78e188aadd8f7e3a50e679a",
"reference": "2221f6ef09e87784e78e188aadd8f7e3a50e679a",
"url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/9d468c11a8da481c22b4e610494babae032fdb03",
"reference": "9d468c11a8da481c22b4e610494babae032fdb03",
"shasum": ""
},
"conflict": {
@ -20254,7 +20243,7 @@
"awesome-support/awesome-support": "<=6.0.7",
"aws/aws-sdk-php": "<=3.371.3",
"ayacoo/redirect-tab": "<2.1.2|>=3,<3.1.7|>=4,<4.0.5",
"azuracast/azuracast": "<=0.23.3",
"azuracast/azuracast": "<=0.23.5",
"b13/seo_basics": "<0.8.2",
"backdrop/backdrop": "<=1.32",
"backpack/crud": "<3.4.9",
@ -20309,7 +20298,7 @@
"cesnet/simplesamlphp-module-proxystatistics": "<3.1",
"chriskacerguis/codeigniter-restserver": "<=2.7.1",
"chrome-php/chrome": "<1.14",
"ci4-cms-erp/ci4ms": "<=0.31.6",
"ci4-cms-erp/ci4ms": "<=0.31.7",
"civicrm/civicrm-core": ">=4.2,<4.2.9|>=4.3,<4.3.3",
"ckeditor/ckeditor": "<4.25",
"clickstorm/cs-seo": ">=6,<6.8|>=7,<7.5|>=8,<8.4|>=9,<9.3",
@ -20361,6 +20350,7 @@
"david-garcia/phpwhois": "<=4.3.1",
"dbrisinajumi/d2files": "<1",
"dcat/laravel-admin": "<=2.1.3|==2.2.0.0-beta|==2.2.2.0-beta",
"dedoc/scramble": ">=0.13.2,<0.13.22",
"derhansen/fe_change_pwd": "<2.0.5|>=3,<3.0.3",
"derhansen/sf_event_mgt": "<4.3.1|>=5,<5.1.1|>=7,<7.4",
"desperado/xml-bundle": "<=0.1.7",
@ -20513,7 +20503,8 @@
"georgringer/news": "<1.3.3",
"geshi/geshi": "<=1.0.9.1",
"getformwork/formwork": "<=2.3.3",
"getgrav/grav": "<1.11.0.0-beta1",
"getgrav/grav": "<2.0.0.0-beta2",
"getgrav/grav-plugin-api": "<1.0.0.0-beta15",
"getkirby/cms": "<4.9|>=5,<5.4",
"getkirby/kirby": "<3.9.8.3-dev|>=3.10,<3.10.1.2-dev|>=4,<4.7.1",
"getkirby/panel": "<2.5.14",
@ -20680,6 +20671,7 @@
"mautic/core-lib": ">=1.0.0.0-beta,<4.4.13|>=5.0.0.0-alpha,<5.1.1",
"mautic/grapes-js-builder-bundle": ">=4,<4.4.18|>=5,<5.2.9|>=6,<6.0.7",
"maximebf/debugbar": "<1.19",
"mckenziearts/livewire-markdown-editor": "<1.3",
"mdanter/ecc": "<2",
"mediawiki/abuse-filter": "<1.39.9|>=1.40,<1.41.3|>=1.42,<1.42.2",
"mediawiki/cargo": "<3.8.3",
@ -20723,6 +20715,7 @@
"munkireport/softwareupdate": "<1.6",
"mustache/mustache": ">=2,<2.14.1",
"mwdelaney/wp-enable-svg": "<=0.2",
"nabeel/phpvms": "<7.0.6",
"namshi/jose": "<2.2",
"nasirkhan/laravel-starter": "<11.11",
"nategood/httpful": "<1",
@ -20763,7 +20756,7 @@
"open-web-analytics/open-web-analytics": "<1.8.1",
"opencart/opencart": ">=0",
"openid/php-openid": "<2.3",
"openmage/magento-lts": "<20.17",
"openmage/magento-lts": "<=20.17",
"opensolutions/vimbadmin": "<=3.0.15",
"opensource-workshop/connect-cms": "<1.41.1|>=2,<2.41.1",
"orchid/platform": ">=8,<14.43",
@ -20812,7 +20805,7 @@
"phpoffice/phpexcel": "<=1.8.2",
"phpoffice/phpspreadsheet": "<=1.30.3|>=2,<=2.1.15|>=2.2,<=2.4.4|>=3,<=3.10.4|>=4,<=5.6",
"phppgadmin/phppgadmin": "<=7.13",
"phpseclib/phpseclib": "<2.0.53|>=3,<3.0.51",
"phpseclib/phpseclib": "<=2.0.53|>=3,<=3.0.51",
"phpservermon/phpservermon": "<3.6",
"phpsysinfo/phpsysinfo": "<3.4.3",
"phpunit/phpunit": "<8.5.52|>=9,<9.6.33|>=10,<10.5.62|>=11,<11.5.50|>=12,<12.5.8|>=12.5.21,<12.5.22|>=13.1.5,<13.1.6",
@ -20906,7 +20899,7 @@
"shopware/shopware": "<=5.7.17|>=6.4.6,<6.6.10.10-dev|>=6.7,<6.7.6.1-dev",
"shopware/storefront": "<6.6.10.10-dev|>=6.7,<6.7.5.1-dev",
"shopxo/shopxo": "<=6.4",
"showdoc/showdoc": "<2.10.4",
"showdoc/showdoc": "<3.8.1",
"shuchkin/simplexlsx": ">=1.0.12,<1.1.13",
"silverstripe-australia/advancedreports": ">=1,<=2",
"silverstripe/admin": "<1.13.19|>=2,<2.1.8",
@ -21119,7 +21112,7 @@
"webcoast/deferred-image-processing": "<1.0.2",
"webklex/laravel-imap": "<5.3",
"webklex/php-imap": "<5.3",
"webonyx/graphql-php": "<=15.31.4",
"webonyx/graphql-php": "<=15.32.2",
"webpa/webpa": "<3.1.2",
"webreinvent/vaahcms": "<=2.3.1",
"wikibase/wikibase": "<=1.39.3",
@ -21239,7 +21232,7 @@
"type": "tidelift"
}
],
"time": "2026-04-30T21:24:12+00:00"
"time": "2026-05-05T21:24:41+00:00"
},
{
"name": "sebastian/cli-parser",

View file

@ -29,60 +29,137 @@ use Symfony\Contracts\HttpClient\ResponseStreamInterface;
/**
* HttpClient wrapper that randomizes the user agent for each request, to make it harder for servers to detect and block us.
* It also sets some other headers to make the requests look more like real browser requests.
* When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent, until we run out of retries.
*/
final class RandomizeUseragentHttpClient implements HttpClientInterface
{
public const USER_AGENTS = [
"Mozilla/5.0 (Windows; U; Windows NT 10.0; Win64; x64) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/52.0.1359.302 Safari/600.6 Edge/15.25690",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 8_8_3) Gecko/20100101 Firefox/51.6",
"Mozilla/5.0 (Android; Android 4.4.4; E:number:20-23:00 Build/24.0.B.1.34) AppleWebKit/603.18 (KHTML, like Gecko) Chrome/47.0.1559.384 Mobile Safari/600.5",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows; Windows NT 6.3; WOW64 Trident/5.0)",
"Mozilla/5.0 (Windows; Windows NT 6.0; Win64; x64) AppleWebKit/602.21 (KHTML, like Gecko) Chrome/51.0.3187.154 Safari/536",
"Mozilla/5.0 (iPhone; CPU iPhone OS 9_4_2; like Mac OS X) AppleWebKit/537.24 (KHTML, like Gecko) Chrome/51.0.2432.275 Mobile Safari/535.6",
"Mozilla/5.0 (U; Linux i680 ) Gecko/20100101 Firefox/57.5",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 8_8_6; en-US) Gecko/20100101 Firefox/53.9",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 8_6_7) AppleWebKit/534.46 (KHTML, like Gecko) Chrome/55.0.3276.345 Safari/535",
"Mozilla/5.0 (Windows; Windows NT 10.5;) AppleWebKit/535.42 (KHTML, like Gecko) Chrome/53.0.1176.353 Safari/534.0 Edge/11.95743",
"Mozilla/5.0 (Linux; Android 5.1.1; MOTO G Build/LPH223) AppleWebKit/600.27 (KHTML, like Gecko) Chrome/47.0.1604.204 Mobile Safari/535.1",
"Mozilla/5.0 (iPod; CPU iPod OS 7_4_8; like Mac OS X) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/50.0.1632.146 Mobile Safari/600.4",
"Mozilla/5.0 (Linux; U; Linux i570 ; en-US) Gecko/20100101 Firefox/49.9",
"Mozilla/5.0 (Windows NT 10.2; WOW64; en-US) AppleWebKit/603.2 (KHTML, like Gecko) Chrome/55.0.1299.311 Safari/535",
"Mozilla/5.0 (Windows; Windows NT 10.5; x64; en-US) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.1443.139 Safari/536.6 Edge/13.79436",
"Mozilla/5.0 (Linux; U; Android 5.1; SM-G9350T Build/MMB29M) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/55.0.2552.307 Mobile Safari/600.8",
"Mozilla/5.0 (Android; Android 6.0; SAMSUNG SM-D9350V Build/MDB08L) AppleWebKit/535.30 (KHTML, like Gecko) Chrome/53.0.1345.278 Mobile Safari/537.4",
"Mozilla/5.0 (Windows; Windows NT 10.0;) AppleWebKit/534.44 (KHTML, like Gecko) Chrome/47.0.3503.387 Safari/601",
private const PROFILES = [
// --- CHROME ON WINDOWS ---
'chrome_windows' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
'Sec-Ch-Ua' => '"Google Chrome";v="142", "Chromium";v="142", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"Windows"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- CHROME ON MACOS ---
'chrome_mac' => [
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
'Sec-Ch-Ua' => '"Google Chrome";v="141", "Chromium";v="141", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"macOS"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- EDGE ON WINDOWS ---
'edge_windows' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0',
'Sec-Ch-Ua' => '"Microsoft Edge";v="142", "Chromium";v="142", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"Windows"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- FIREFOX ON WINDOWS ---
'firefox_windows' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
// Firefox does not send Sec-Ch-Ua headers by default
],
// --- FIREFOX ON LINUX ---
'firefox_linux' => [
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64; rv:137.0) Gecko/20100101 Firefox/137.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
],
// --- SAFARI ON MACOS ---
'safari_mac' => [
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.9',
],
// --- CHROME ON ANDROID (Mobile) ---
'chrome_android' => [
'User-Agent' => 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Mobile Safari/537.36',
'Sec-Ch-Ua' => '"Google Chrome";v="142", "Chromium";v="142", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?1',
'Sec-Ch-Ua-Platform' => '"Android"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- SAFARI ON IPHONE (Mobile) ---
'safari_iphone' => [
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Mobile/15E148 Safari/604.1',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.9',
],
];
private const COMMON_HEADERS = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language' => 'en-US,en;q=0.9',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'none',
'Sec-Fetch-User' => '?1',
'Upgrade-Insecure-Requests' => '1',
];
private const ENTRY_REFERERS = [
'https://www.google.com/',
'https://www.bing.com/',
'https://duckduckgo.com/',
'https://t.co/', // Twitter/X shortener
'https://www.reddit.com/',
];
private ?string $lastUrl = null;
public function __construct(
private readonly HttpClientInterface $client,
private readonly array $userAgents = self::USER_AGENTS,
private readonly int $repeatOnFailure = 1,
) {
}
public function getRandomUserAgent(): string
{
return $this->userAgents[array_rand($this->userAgents)];
}
public function request(string $method, string $url, array $options = []): ResponseInterface
{
$repeatsLeft = $this->repeatOnFailure;
do {
$modifiedOptions = $options;
if (!isset($modifiedOptions['headers']['User-Agent'])) {
$modifiedOptions['headers']['User-Agent'] = $this->getRandomUserAgent();
$profile = self::PROFILES[array_rand(self::PROFILES)];
// Merge common headers with the specific browser profile
$headers = array_merge(self::COMMON_HEADERS, $profile);
//Add a Referer header if not already set, to make it look more like a real browser request. We use the last URL we visited as the referer, to simulate internal navigation. If we don't have a last URL (first request), we pick a random entry point from common referers.
if (!isset($options['headers']['Referer'])) {
if ($this->lastUrl !== null) {
// If we have a previous URL, use it (Internal Navigation)
$headers['Referer'] = $this->lastUrl;
} else {
// First request? Pick an entry point (External Entry)
$headers['Referer'] = self::ENTRY_REFERERS[array_rand(self::ENTRY_REFERERS)];
}
}
$response = $this->client->request($method, $url, $modifiedOptions);
// Allow manual overrides from $options
$options['headers'] = array_merge($headers, $options['headers'] ?? []);
$response = $this->client->request($method, $url, $options);
//When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent
if (!in_array($response->getStatusCode(), [403, 429, 503], true)) {
$this->lastUrl = $url; // Update last visited URL for referer in the next request
return $response;
}
//Otherwise we try again with a different user agent, until we run out of retries
usleep(5000); // Sleep for 5ms to avoid hammering the server too hard in case of multiple retries
} while ($repeatsLeft-- > 0);
return $response;
@ -95,6 +172,6 @@ final class RandomizeUseragentHttpClient implements HttpClientInterface
public function withOptions(array $options): static
{
return new self($this->client->withOptions($options), $this->userAgents, $this->repeatOnFailure);
return new self($this->client->withOptions($options), $this->repeatOnFailure);
}
}

View file

@ -39,6 +39,7 @@ use Psr\Cache\CacheItemPoolInterface;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\DomCrawler\UriResolver;
use Symfony\Component\HttpClient\NoPrivateNetworkHttpClient;
use Symfony\Component\Intl\Languages;
use Symfony\Contracts\HttpClient\HttpClientInterface;
@ -146,7 +147,7 @@ final class AIWebProvider implements InfoProviderInterface
$html = $response->getContent();
//Convert html to markdown, to provide a cleaner input to the LLM.
$markdown = $this->htmlToMarkdown($html);
$markdown = $this->htmlToMarkdown($html, $url);
//Truncate markdown to max content length, if needed
$markdown = u($markdown)->truncate($this->settings->maxContentLength, '... [truncated]')->toString();
@ -182,10 +183,34 @@ final class AIWebProvider implements InfoProviderInterface
return json_encode($items->toObject(), JSON_THROW_ON_ERROR);
}
private function htmlToMarkdown(string $html): string
private function htmlToMarkdown(string $html, string $url): string
{
//Extract only the main content of the page to avoid overwhelming the LLM with irrelevant information.
$crawler = new Crawler($html);
//Replace relative URLs with absolute URLs, to ensure that the LLM has full context and can access the links if needed.
$baseUrl = $crawler->getBaseHref() ?? $url;
//Replace all relative links with their absolute counnterparts, to provide more context to the LLM and to ensure that any links included in the markdown are valid and can be accessed if needed.
$crawler->filter('a')->each(function (Crawler $node) use ($baseUrl) {
$href = $node->attr('href');
if ($href) {
$absoluteUrl = UriResolver::resolve($href, $baseUrl);
//@phpstan-ignore-next-line we know that getNode(0) will always return a DOMElement, because the crawler is initialized with valid HTML and we are filtering for 'a' tags, which are always DOMElements.
$node->getNode(0)->setAttribute('href', $absoluteUrl);
}
});
$crawler->filter('img')->each(function (Crawler $node) use ($baseUrl) {
$src = $node->attr('src');
if ($src) {
$absoluteUrl = UriResolver::resolve($src, $baseUrl);
//@phpstan-ignore-next-line we know that getNode(0) will always return a DOMElement, because the crawler is initialized with valid HTML and we are filtering for 'a' tags, which are always DOMElements.
$node->getNode(0)->setAttribute('src', $absoluteUrl);
}
});
//Extract only the main content of the page to avoid overwhelming the LLM with irrelevant information.
$mainContent = $crawler->filter('main, article, #content');
// If we found a specific content area, get its HTML; otherwise, use the whole body.
@ -198,7 +223,7 @@ final class AIWebProvider implements InfoProviderInterface
}
} else {
//Use the whole body content, as it might contain relevant information, especially for simpler pages that don't have a clear main/content section.
$htmlToConvert = $html;
$htmlToConvert = $crawler->outerHtml();
}

View file

@ -296,17 +296,21 @@
<p>{% trans %}update_manager.docker.setup_description{% endtrans %}</p>
<h6>{% trans %}update_manager.docker.setup_step1{% endtrans %}</h6>
<pre class="bg-dark text-light p-3 rounded"><code>services:
watchtower:
image: containrrr/watchtower
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
- WATCHTOWER_HTTP_API_UPDATE=true
- WATCHTOWER_HTTP_API_TOKEN=your-secret-token
- WATCHTOWER_LABEL_ENABLE=true
ports:
- "8080:8080"</code></pre>
<pre class="bg-dark text-light p-3 rounded"><code>
# See documentation for full example: https://docs.part-db.de/installation/installation_docker.html
services:
watchtower:
image: ghcr.io/nicholas-fedor/watchtower:latest
container_name: watchtower
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
- WATCHTOWER_HTTP_API_UPDATE=true
- WATCHTOWER_HTTP_API_TOKEN=your-secret-token
- WATCHTOWER_LABEL_ENABLE=true
- WATCHTOWER_CLEANUP=true
</code></pre>
<h6>{% trans %}update_manager.docker.setup_step2{% endtrans %}</h6>
<pre class="bg-dark text-light p-3 rounded"><code>WATCHTOWER_API_URL=http://watchtower:8080