Compare commits

..

No commits in common. "master" and "v2.11.0" have entirely different histories.

5 changed files with 77 additions and 176 deletions

View file

@ -1 +1 @@
2.11.1 2.11.0

53
composer.lock generated
View file

@ -17374,25 +17374,25 @@
}, },
{ {
"name": "symplify/easy-coding-standard", "name": "symplify/easy-coding-standard",
"version": "13.1.3", "version": "13.1.2",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/easy-coding-standard/ecs.git", "url": "https://github.com/easy-coding-standard/ecs.git",
"reference": "d894d088d7ebb9326f9eed28bf251481c813b89f" "reference": "6d22473d1f36945884d8cb291777166020a47770"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/easy-coding-standard/ecs/zipball/d894d088d7ebb9326f9eed28bf251481c813b89f", "url": "https://api.github.com/repos/easy-coding-standard/ecs/zipball/6d22473d1f36945884d8cb291777166020a47770",
"reference": "d894d088d7ebb9326f9eed28bf251481c813b89f", "reference": "6d22473d1f36945884d8cb291777166020a47770",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
"php": ">=7.2" "php": ">=7.2"
}, },
"conflict": { "conflict": {
"friendsofphp/php-cs-fixer": "<3.95.1", "friendsofphp/php-cs-fixer": "<3.92.4",
"phpcsstandards/php_codesniffer": "<4.0.1", "phpcsstandards/php_codesniffer": "<4.0.1",
"symplify/coding-standard": "<13.0" "symplify/coding-standard": "<12.1"
}, },
"suggest": { "suggest": {
"ext-dom": "Needed to support checkstyle output format in class CheckstyleOutputFormatter" "ext-dom": "Needed to support checkstyle output format in class CheckstyleOutputFormatter"
@ -17418,9 +17418,20 @@
"static analysis" "static analysis"
], ],
"support": { "support": {
"source": "https://github.com/easy-coding-standard/ecs/tree/13.1.3" "issues": "https://github.com/easy-coding-standard/ecs/issues",
"source": "https://github.com/easy-coding-standard/ecs/tree/13.1.2"
}, },
"time": "2026-05-04T21:45:57+00:00" "funding": [
{
"url": "https://www.paypal.me/rectorphp",
"type": "custom"
},
{
"url": "https://github.com/tomasvotruba",
"type": "github"
}
],
"time": "2026-05-03T22:05:09+00:00"
}, },
{ {
"name": "tecnickcom/tc-lib-barcode", "name": "tecnickcom/tc-lib-barcode",
@ -20178,12 +20189,12 @@
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/Roave/SecurityAdvisories.git", "url": "https://github.com/Roave/SecurityAdvisories.git",
"reference": "9d468c11a8da481c22b4e610494babae032fdb03" "reference": "2221f6ef09e87784e78e188aadd8f7e3a50e679a"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/9d468c11a8da481c22b4e610494babae032fdb03", "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/2221f6ef09e87784e78e188aadd8f7e3a50e679a",
"reference": "9d468c11a8da481c22b4e610494babae032fdb03", "reference": "2221f6ef09e87784e78e188aadd8f7e3a50e679a",
"shasum": "" "shasum": ""
}, },
"conflict": { "conflict": {
@ -20243,7 +20254,7 @@
"awesome-support/awesome-support": "<=6.0.7", "awesome-support/awesome-support": "<=6.0.7",
"aws/aws-sdk-php": "<=3.371.3", "aws/aws-sdk-php": "<=3.371.3",
"ayacoo/redirect-tab": "<2.1.2|>=3,<3.1.7|>=4,<4.0.5", "ayacoo/redirect-tab": "<2.1.2|>=3,<3.1.7|>=4,<4.0.5",
"azuracast/azuracast": "<=0.23.5", "azuracast/azuracast": "<=0.23.3",
"b13/seo_basics": "<0.8.2", "b13/seo_basics": "<0.8.2",
"backdrop/backdrop": "<=1.32", "backdrop/backdrop": "<=1.32",
"backpack/crud": "<3.4.9", "backpack/crud": "<3.4.9",
@ -20298,7 +20309,7 @@
"cesnet/simplesamlphp-module-proxystatistics": "<3.1", "cesnet/simplesamlphp-module-proxystatistics": "<3.1",
"chriskacerguis/codeigniter-restserver": "<=2.7.1", "chriskacerguis/codeigniter-restserver": "<=2.7.1",
"chrome-php/chrome": "<1.14", "chrome-php/chrome": "<1.14",
"ci4-cms-erp/ci4ms": "<=0.31.7", "ci4-cms-erp/ci4ms": "<=0.31.6",
"civicrm/civicrm-core": ">=4.2,<4.2.9|>=4.3,<4.3.3", "civicrm/civicrm-core": ">=4.2,<4.2.9|>=4.3,<4.3.3",
"ckeditor/ckeditor": "<4.25", "ckeditor/ckeditor": "<4.25",
"clickstorm/cs-seo": ">=6,<6.8|>=7,<7.5|>=8,<8.4|>=9,<9.3", "clickstorm/cs-seo": ">=6,<6.8|>=7,<7.5|>=8,<8.4|>=9,<9.3",
@ -20350,7 +20361,6 @@
"david-garcia/phpwhois": "<=4.3.1", "david-garcia/phpwhois": "<=4.3.1",
"dbrisinajumi/d2files": "<1", "dbrisinajumi/d2files": "<1",
"dcat/laravel-admin": "<=2.1.3|==2.2.0.0-beta|==2.2.2.0-beta", "dcat/laravel-admin": "<=2.1.3|==2.2.0.0-beta|==2.2.2.0-beta",
"dedoc/scramble": ">=0.13.2,<0.13.22",
"derhansen/fe_change_pwd": "<2.0.5|>=3,<3.0.3", "derhansen/fe_change_pwd": "<2.0.5|>=3,<3.0.3",
"derhansen/sf_event_mgt": "<4.3.1|>=5,<5.1.1|>=7,<7.4", "derhansen/sf_event_mgt": "<4.3.1|>=5,<5.1.1|>=7,<7.4",
"desperado/xml-bundle": "<=0.1.7", "desperado/xml-bundle": "<=0.1.7",
@ -20503,8 +20513,7 @@
"georgringer/news": "<1.3.3", "georgringer/news": "<1.3.3",
"geshi/geshi": "<=1.0.9.1", "geshi/geshi": "<=1.0.9.1",
"getformwork/formwork": "<=2.3.3", "getformwork/formwork": "<=2.3.3",
"getgrav/grav": "<2.0.0.0-beta2", "getgrav/grav": "<1.11.0.0-beta1",
"getgrav/grav-plugin-api": "<1.0.0.0-beta15",
"getkirby/cms": "<4.9|>=5,<5.4", "getkirby/cms": "<4.9|>=5,<5.4",
"getkirby/kirby": "<3.9.8.3-dev|>=3.10,<3.10.1.2-dev|>=4,<4.7.1", "getkirby/kirby": "<3.9.8.3-dev|>=3.10,<3.10.1.2-dev|>=4,<4.7.1",
"getkirby/panel": "<2.5.14", "getkirby/panel": "<2.5.14",
@ -20671,7 +20680,6 @@
"mautic/core-lib": ">=1.0.0.0-beta,<4.4.13|>=5.0.0.0-alpha,<5.1.1", "mautic/core-lib": ">=1.0.0.0-beta,<4.4.13|>=5.0.0.0-alpha,<5.1.1",
"mautic/grapes-js-builder-bundle": ">=4,<4.4.18|>=5,<5.2.9|>=6,<6.0.7", "mautic/grapes-js-builder-bundle": ">=4,<4.4.18|>=5,<5.2.9|>=6,<6.0.7",
"maximebf/debugbar": "<1.19", "maximebf/debugbar": "<1.19",
"mckenziearts/livewire-markdown-editor": "<1.3",
"mdanter/ecc": "<2", "mdanter/ecc": "<2",
"mediawiki/abuse-filter": "<1.39.9|>=1.40,<1.41.3|>=1.42,<1.42.2", "mediawiki/abuse-filter": "<1.39.9|>=1.40,<1.41.3|>=1.42,<1.42.2",
"mediawiki/cargo": "<3.8.3", "mediawiki/cargo": "<3.8.3",
@ -20715,7 +20723,6 @@
"munkireport/softwareupdate": "<1.6", "munkireport/softwareupdate": "<1.6",
"mustache/mustache": ">=2,<2.14.1", "mustache/mustache": ">=2,<2.14.1",
"mwdelaney/wp-enable-svg": "<=0.2", "mwdelaney/wp-enable-svg": "<=0.2",
"nabeel/phpvms": "<7.0.6",
"namshi/jose": "<2.2", "namshi/jose": "<2.2",
"nasirkhan/laravel-starter": "<11.11", "nasirkhan/laravel-starter": "<11.11",
"nategood/httpful": "<1", "nategood/httpful": "<1",
@ -20756,7 +20763,7 @@
"open-web-analytics/open-web-analytics": "<1.8.1", "open-web-analytics/open-web-analytics": "<1.8.1",
"opencart/opencart": ">=0", "opencart/opencart": ">=0",
"openid/php-openid": "<2.3", "openid/php-openid": "<2.3",
"openmage/magento-lts": "<=20.17", "openmage/magento-lts": "<20.17",
"opensolutions/vimbadmin": "<=3.0.15", "opensolutions/vimbadmin": "<=3.0.15",
"opensource-workshop/connect-cms": "<1.41.1|>=2,<2.41.1", "opensource-workshop/connect-cms": "<1.41.1|>=2,<2.41.1",
"orchid/platform": ">=8,<14.43", "orchid/platform": ">=8,<14.43",
@ -20805,7 +20812,7 @@
"phpoffice/phpexcel": "<=1.8.2", "phpoffice/phpexcel": "<=1.8.2",
"phpoffice/phpspreadsheet": "<=1.30.3|>=2,<=2.1.15|>=2.2,<=2.4.4|>=3,<=3.10.4|>=4,<=5.6", "phpoffice/phpspreadsheet": "<=1.30.3|>=2,<=2.1.15|>=2.2,<=2.4.4|>=3,<=3.10.4|>=4,<=5.6",
"phppgadmin/phppgadmin": "<=7.13", "phppgadmin/phppgadmin": "<=7.13",
"phpseclib/phpseclib": "<=2.0.53|>=3,<=3.0.51", "phpseclib/phpseclib": "<2.0.53|>=3,<3.0.51",
"phpservermon/phpservermon": "<3.6", "phpservermon/phpservermon": "<3.6",
"phpsysinfo/phpsysinfo": "<3.4.3", "phpsysinfo/phpsysinfo": "<3.4.3",
"phpunit/phpunit": "<8.5.52|>=9,<9.6.33|>=10,<10.5.62|>=11,<11.5.50|>=12,<12.5.8|>=12.5.21,<12.5.22|>=13.1.5,<13.1.6", "phpunit/phpunit": "<8.5.52|>=9,<9.6.33|>=10,<10.5.62|>=11,<11.5.50|>=12,<12.5.8|>=12.5.21,<12.5.22|>=13.1.5,<13.1.6",
@ -20899,7 +20906,7 @@
"shopware/shopware": "<=5.7.17|>=6.4.6,<6.6.10.10-dev|>=6.7,<6.7.6.1-dev", "shopware/shopware": "<=5.7.17|>=6.4.6,<6.6.10.10-dev|>=6.7,<6.7.6.1-dev",
"shopware/storefront": "<6.6.10.10-dev|>=6.7,<6.7.5.1-dev", "shopware/storefront": "<6.6.10.10-dev|>=6.7,<6.7.5.1-dev",
"shopxo/shopxo": "<=6.4", "shopxo/shopxo": "<=6.4",
"showdoc/showdoc": "<3.8.1", "showdoc/showdoc": "<2.10.4",
"shuchkin/simplexlsx": ">=1.0.12,<1.1.13", "shuchkin/simplexlsx": ">=1.0.12,<1.1.13",
"silverstripe-australia/advancedreports": ">=1,<=2", "silverstripe-australia/advancedreports": ">=1,<=2",
"silverstripe/admin": "<1.13.19|>=2,<2.1.8", "silverstripe/admin": "<1.13.19|>=2,<2.1.8",
@ -21112,7 +21119,7 @@
"webcoast/deferred-image-processing": "<1.0.2", "webcoast/deferred-image-processing": "<1.0.2",
"webklex/laravel-imap": "<5.3", "webklex/laravel-imap": "<5.3",
"webklex/php-imap": "<5.3", "webklex/php-imap": "<5.3",
"webonyx/graphql-php": "<=15.32.2", "webonyx/graphql-php": "<=15.31.4",
"webpa/webpa": "<3.1.2", "webpa/webpa": "<3.1.2",
"webreinvent/vaahcms": "<=2.3.1", "webreinvent/vaahcms": "<=2.3.1",
"wikibase/wikibase": "<=1.39.3", "wikibase/wikibase": "<=1.39.3",
@ -21232,7 +21239,7 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2026-05-05T21:24:41+00:00" "time": "2026-04-30T21:24:12+00:00"
}, },
{ {
"name": "sebastian/cli-parser", "name": "sebastian/cli-parser",

View file

@ -29,137 +29,60 @@ use Symfony\Contracts\HttpClient\ResponseStreamInterface;
/** /**
* HttpClient wrapper that randomizes the user agent for each request, to make it harder for servers to detect and block us. * HttpClient wrapper that randomizes the user agent for each request, to make it harder for servers to detect and block us.
* It also sets some other headers to make the requests look more like real browser requests.
* When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent, until we run out of retries. * When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent, until we run out of retries.
*/ */
final class RandomizeUseragentHttpClient implements HttpClientInterface final class RandomizeUseragentHttpClient implements HttpClientInterface
{ {
private const PROFILES = [ public const USER_AGENTS = [
// --- CHROME ON WINDOWS --- "Mozilla/5.0 (Windows; U; Windows NT 10.0; Win64; x64) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/52.0.1359.302 Safari/600.6 Edge/15.25690",
'chrome_windows' => [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299",
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36', "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 8_8_3) Gecko/20100101 Firefox/51.6",
'Sec-Ch-Ua' => '"Google Chrome";v="142", "Chromium";v="142", "Not=A?Brand";v="99"', "Mozilla/5.0 (Android; Android 4.4.4; E:number:20-23:00 Build/24.0.B.1.34) AppleWebKit/603.18 (KHTML, like Gecko) Chrome/47.0.1559.384 Mobile Safari/600.5",
'Sec-Ch-Ua-Mobile' => '?0', "Mozilla/5.0 (compatible; MSIE 9.0; Windows; Windows NT 6.3; WOW64 Trident/5.0)",
'Sec-Ch-Ua-Platform' => '"Windows"', "Mozilla/5.0 (Windows; Windows NT 6.0; Win64; x64) AppleWebKit/602.21 (KHTML, like Gecko) Chrome/51.0.3187.154 Safari/536",
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', "Mozilla/5.0 (iPhone; CPU iPhone OS 9_4_2; like Mac OS X) AppleWebKit/537.24 (KHTML, like Gecko) Chrome/51.0.2432.275 Mobile Safari/535.6",
], "Mozilla/5.0 (U; Linux i680 ) Gecko/20100101 Firefox/57.5",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 8_8_6; en-US) Gecko/20100101 Firefox/53.9",
// --- CHROME ON MACOS --- "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 8_6_7) AppleWebKit/534.46 (KHTML, like Gecko) Chrome/55.0.3276.345 Safari/535",
'chrome_mac' => [ "Mozilla/5.0 (Windows; Windows NT 10.5;) AppleWebKit/535.42 (KHTML, like Gecko) Chrome/53.0.1176.353 Safari/534.0 Edge/11.95743",
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', "Mozilla/5.0 (Linux; Android 5.1.1; MOTO G Build/LPH223) AppleWebKit/600.27 (KHTML, like Gecko) Chrome/47.0.1604.204 Mobile Safari/535.1",
'Sec-Ch-Ua' => '"Google Chrome";v="141", "Chromium";v="141", "Not=A?Brand";v="99"', "Mozilla/5.0 (iPod; CPU iPod OS 7_4_8; like Mac OS X) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/50.0.1632.146 Mobile Safari/600.4",
'Sec-Ch-Ua-Mobile' => '?0', "Mozilla/5.0 (Linux; U; Linux i570 ; en-US) Gecko/20100101 Firefox/49.9",
'Sec-Ch-Ua-Platform' => '"macOS"', "Mozilla/5.0 (Windows NT 10.2; WOW64; en-US) AppleWebKit/603.2 (KHTML, like Gecko) Chrome/55.0.1299.311 Safari/535",
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', "Mozilla/5.0 (Windows; Windows NT 10.5; x64; en-US) AppleWebKit/603.39 (KHTML, like Gecko) Chrome/52.0.1443.139 Safari/536.6 Edge/13.79436",
], "Mozilla/5.0 (Linux; U; Android 5.1; SM-G9350T Build/MMB29M) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/55.0.2552.307 Mobile Safari/600.8",
"Mozilla/5.0 (Android; Android 6.0; SAMSUNG SM-D9350V Build/MDB08L) AppleWebKit/535.30 (KHTML, like Gecko) Chrome/53.0.1345.278 Mobile Safari/537.4",
// --- EDGE ON WINDOWS --- "Mozilla/5.0 (Windows; Windows NT 10.0;) AppleWebKit/534.44 (KHTML, like Gecko) Chrome/47.0.3503.387 Safari/601",
'edge_windows' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0',
'Sec-Ch-Ua' => '"Microsoft Edge";v="142", "Chromium";v="142", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"Windows"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- FIREFOX ON WINDOWS ---
'firefox_windows' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
// Firefox does not send Sec-Ch-Ua headers by default
],
// --- FIREFOX ON LINUX ---
'firefox_linux' => [
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64; rv:137.0) Gecko/20100101 Firefox/137.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
],
// --- SAFARI ON MACOS ---
'safari_mac' => [
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.9',
],
// --- CHROME ON ANDROID (Mobile) ---
'chrome_android' => [
'User-Agent' => 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Mobile Safari/537.36',
'Sec-Ch-Ua' => '"Google Chrome";v="142", "Chromium";v="142", "Not=A?Brand";v="99"',
'Sec-Ch-Ua-Mobile' => '?1',
'Sec-Ch-Ua-Platform' => '"Android"',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
],
// --- SAFARI ON IPHONE (Mobile) ---
'safari_iphone' => [
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Mobile/15E148 Safari/604.1',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.9',
],
]; ];
private const COMMON_HEADERS = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language' => 'en-US,en;q=0.9',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'none',
'Sec-Fetch-User' => '?1',
'Upgrade-Insecure-Requests' => '1',
];
private const ENTRY_REFERERS = [
'https://www.google.com/',
'https://www.bing.com/',
'https://duckduckgo.com/',
'https://t.co/', // Twitter/X shortener
'https://www.reddit.com/',
];
private ?string $lastUrl = null;
public function __construct( public function __construct(
private readonly HttpClientInterface $client, private readonly HttpClientInterface $client,
private readonly array $userAgents = self::USER_AGENTS,
private readonly int $repeatOnFailure = 1, private readonly int $repeatOnFailure = 1,
) { ) {
} }
public function getRandomUserAgent(): string
{
return $this->userAgents[array_rand($this->userAgents)];
}
public function request(string $method, string $url, array $options = []): ResponseInterface public function request(string $method, string $url, array $options = []): ResponseInterface
{ {
$repeatsLeft = $this->repeatOnFailure; $repeatsLeft = $this->repeatOnFailure;
do { do {
$profile = self::PROFILES[array_rand(self::PROFILES)]; $modifiedOptions = $options;
if (!isset($modifiedOptions['headers']['User-Agent'])) {
// Merge common headers with the specific browser profile $modifiedOptions['headers']['User-Agent'] = $this->getRandomUserAgent();
$headers = array_merge(self::COMMON_HEADERS, $profile);
//Add a Referer header if not already set, to make it look more like a real browser request. We use the last URL we visited as the referer, to simulate internal navigation. If we don't have a last URL (first request), we pick a random entry point from common referers.
if (!isset($options['headers']['Referer'])) {
if ($this->lastUrl !== null) {
// If we have a previous URL, use it (Internal Navigation)
$headers['Referer'] = $this->lastUrl;
} else {
// First request? Pick an entry point (External Entry)
$headers['Referer'] = self::ENTRY_REFERERS[array_rand(self::ENTRY_REFERERS)];
} }
} $response = $this->client->request($method, $url, $modifiedOptions);
// Allow manual overrides from $options
$options['headers'] = array_merge($headers, $options['headers'] ?? []);
$response = $this->client->request($method, $url, $options);
//When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent //When we get a 503, 403 or 429, we assume that the server is blocking us and try again with a different user agent
if (!in_array($response->getStatusCode(), [403, 429, 503], true)) { if (!in_array($response->getStatusCode(), [403, 429, 503], true)) {
$this->lastUrl = $url; // Update last visited URL for referer in the next request
return $response; return $response;
} }
//Otherwise we try again with a different user agent, until we run out of retries //Otherwise we try again with a different user agent, until we run out of retries
usleep(5000); // Sleep for 5ms to avoid hammering the server too hard in case of multiple retries
} while ($repeatsLeft-- > 0); } while ($repeatsLeft-- > 0);
return $response; return $response;
@ -172,6 +95,6 @@ final class RandomizeUseragentHttpClient implements HttpClientInterface
public function withOptions(array $options): static public function withOptions(array $options): static
{ {
return new self($this->client->withOptions($options), $this->repeatOnFailure); return new self($this->client->withOptions($options), $this->userAgents, $this->repeatOnFailure);
} }
} }

View file

@ -39,7 +39,6 @@ use Psr\Cache\CacheItemPoolInterface;
use Symfony\AI\Platform\Message\Message; use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\DomCrawler\UriResolver;
use Symfony\Component\HttpClient\NoPrivateNetworkHttpClient; use Symfony\Component\HttpClient\NoPrivateNetworkHttpClient;
use Symfony\Component\Intl\Languages; use Symfony\Component\Intl\Languages;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
@ -147,7 +146,7 @@ final class AIWebProvider implements InfoProviderInterface
$html = $response->getContent(); $html = $response->getContent();
//Convert html to markdown, to provide a cleaner input to the LLM. //Convert html to markdown, to provide a cleaner input to the LLM.
$markdown = $this->htmlToMarkdown($html, $url); $markdown = $this->htmlToMarkdown($html);
//Truncate markdown to max content length, if needed //Truncate markdown to max content length, if needed
$markdown = u($markdown)->truncate($this->settings->maxContentLength, '... [truncated]')->toString(); $markdown = u($markdown)->truncate($this->settings->maxContentLength, '... [truncated]')->toString();
@ -183,34 +182,10 @@ final class AIWebProvider implements InfoProviderInterface
return json_encode($items->toObject(), JSON_THROW_ON_ERROR); return json_encode($items->toObject(), JSON_THROW_ON_ERROR);
} }
private function htmlToMarkdown(string $html, string $url): string private function htmlToMarkdown(string $html): string
{ {
$crawler = new Crawler($html);
//Replace relative URLs with absolute URLs, to ensure that the LLM has full context and can access the links if needed.
$baseUrl = $crawler->getBaseHref() ?? $url;
//Replace all relative links with their absolute counnterparts, to provide more context to the LLM and to ensure that any links included in the markdown are valid and can be accessed if needed.
$crawler->filter('a')->each(function (Crawler $node) use ($baseUrl) {
$href = $node->attr('href');
if ($href) {
$absoluteUrl = UriResolver::resolve($href, $baseUrl);
//@phpstan-ignore-next-line we know that getNode(0) will always return a DOMElement, because the crawler is initialized with valid HTML and we are filtering for 'a' tags, which are always DOMElements.
$node->getNode(0)->setAttribute('href', $absoluteUrl);
}
});
$crawler->filter('img')->each(function (Crawler $node) use ($baseUrl) {
$src = $node->attr('src');
if ($src) {
$absoluteUrl = UriResolver::resolve($src, $baseUrl);
//@phpstan-ignore-next-line we know that getNode(0) will always return a DOMElement, because the crawler is initialized with valid HTML and we are filtering for 'a' tags, which are always DOMElements.
$node->getNode(0)->setAttribute('src', $absoluteUrl);
}
});
//Extract only the main content of the page to avoid overwhelming the LLM with irrelevant information. //Extract only the main content of the page to avoid overwhelming the LLM with irrelevant information.
$crawler = new Crawler($html);
$mainContent = $crawler->filter('main, article, #content'); $mainContent = $crawler->filter('main, article, #content');
// If we found a specific content area, get its HTML; otherwise, use the whole body. // If we found a specific content area, get its HTML; otherwise, use the whole body.
@ -223,7 +198,7 @@ final class AIWebProvider implements InfoProviderInterface
} }
} else { } else {
//Use the whole body content, as it might contain relevant information, especially for simpler pages that don't have a clear main/content section. //Use the whole body content, as it might contain relevant information, especially for simpler pages that don't have a clear main/content section.
$htmlToConvert = $crawler->outerHtml(); $htmlToConvert = $html;
} }

View file

@ -296,21 +296,17 @@
<p>{% trans %}update_manager.docker.setup_description{% endtrans %}</p> <p>{% trans %}update_manager.docker.setup_description{% endtrans %}</p>
<h6>{% trans %}update_manager.docker.setup_step1{% endtrans %}</h6> <h6>{% trans %}update_manager.docker.setup_step1{% endtrans %}</h6>
<pre class="bg-dark text-light p-3 rounded"><code> <pre class="bg-dark text-light p-3 rounded"><code>services:
# See documentation for full example: https://docs.part-db.de/installation/installation_docker.html
services:
watchtower: watchtower:
image: ghcr.io/nicholas-fedor/watchtower:latest image: containrrr/watchtower
container_name: watchtower
restart: unless-stopped
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
environment: environment:
- WATCHTOWER_HTTP_API_UPDATE=true - WATCHTOWER_HTTP_API_UPDATE=true
- WATCHTOWER_HTTP_API_TOKEN=your-secret-token - WATCHTOWER_HTTP_API_TOKEN=your-secret-token
- WATCHTOWER_LABEL_ENABLE=true - WATCHTOWER_LABEL_ENABLE=true
- WATCHTOWER_CLEANUP=true ports:
</code></pre> - "8080:8080"</code></pre>
<h6>{% trans %}update_manager.docker.setup_step2{% endtrans %}</h6> <h6>{% trans %}update_manager.docker.setup_step2{% endtrans %}</h6>
<pre class="bg-dark text-light p-3 rounded"><code>WATCHTOWER_API_URL=http://watchtower:8080 <pre class="bg-dark text-light p-3 rounded"><code>WATCHTOWER_API_URL=http://watchtower:8080