Randomize UserAgent and prevent access to private networks for AI extractor

This commit is contained in:
Jan Böhmer 2026-04-26 23:18:09 +02:00
parent ad096aa6ff
commit 5edcc60d41

View file

@ -25,6 +25,7 @@ declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers; namespace App\Services\InfoProviderSystem\Providers;
use App\Exceptions\ProviderIDNotSupportedException; use App\Exceptions\ProviderIDNotSupportedException;
use App\Helpers\RandomizeUseragentHttpClient;
use App\Services\AI\AIPlatformRegistry; use App\Services\AI\AIPlatformRegistry;
use App\Services\InfoProviderSystem\DTOJsonSchemaConverter; use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
@ -35,6 +36,7 @@ use League\HTMLToMarkdown\HtmlConverter;
use Symfony\AI\Platform\Message\Message; use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpClient\NoPrivateNetworkHttpClient;
use Symfony\Component\Intl\Languages; use Symfony\Component\Intl\Languages;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
@ -55,12 +57,12 @@ final class AIInfoExtractor implements InfoProviderInterface
private readonly AIPlatformRegistry $AIPlatformRegistry, private readonly AIPlatformRegistry $AIPlatformRegistry,
private readonly DTOJsonSchemaConverter $jsonSchemaConverter, private readonly DTOJsonSchemaConverter $jsonSchemaConverter,
) { ) {
$this->httpClient = $httpClient->withOptions([ //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us
'timeout' => 30, $this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions(
'headers' => [ [
'User-Agent' => 'Mozilla/5.0 (compatible; Part-DB AI-Extractor/1.0)', 'timeout' => 15,
], ]
]); );
} }
public function getProviderInfo(): array public function getProviderInfo(): array
@ -199,7 +201,7 @@ final class AIInfoExtractor implements InfoProviderInterface
$result = $aiPlatform->invoke($this->settings->model ?? throw new \RuntimeException('No model selected'), $input, [ $result = $aiPlatform->invoke($this->settings->model ?? throw new \RuntimeException('No model selected'), $input, [
'response_format' => [ 'response_format' => [
'type' => 'json_schema', 'type' => 'json_schema',
'json_schema' => $this->jsonSchemaConverter->getJSONSchema(), 'json_schema' => $this->jsonSchemaConverter->getJSONSchema(),
] ]
]); ]);
} catch (\Throwable $e) { } catch (\Throwable $e) {