From 889aa08b4e1abb4323e94db1dc4aa74f369753f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 23:42:26 +0200 Subject: [PATCH] Added URL delegation feature to AI provider and added option to skip that delegation --- src/Controller/InfoProviderController.php | 5 ++ src/Controller/PartController.php | 6 +- .../InfoProviderSystem/FromURLFormType.php | 5 ++ .../CreateFromUrlHelper.php | 59 +++++++++++++++++- .../Providers/AIWebProvider.php | 28 ++++++++- .../Providers/GenericWebProvider.php | 62 +++++-------------- .../Providers/InfoProviderInterface.php | 1 + .../from_url/from_url.html.twig | 1 + translations/messages.en.xlf | 6 ++ 9 files changed, 121 insertions(+), 52 deletions(-) diff --git a/src/Controller/InfoProviderController.php b/src/Controller/InfoProviderController.php index 074d3894..817a6651 100644 --- a/src/Controller/InfoProviderController.php +++ b/src/Controller/InfoProviderController.php @@ -240,12 +240,16 @@ class InfoProviderController extends AbstractController $method = $form->get('method')->getData(); $no_cache = $form->get('no_cache')->getData(); + $skip_delegation = $form->get('skip_delegation')->getData(); try { //It's okay if we use the cached results here, as its just for convenience $searchResult = $this->infoRetriever->searchByKeyword( keyword: $url, providers: [$method], + options: [ + InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + ] ); if (count($searchResult) === 0) { @@ -257,6 +261,7 @@ class InfoProviderController extends AbstractController 'providerKey' => $searchResult->provider_key, 'providerId' => $searchResult->provider_id, 'no_cache' => $no_cache ? 1 : null, + 'skip_delegation' => $skip_delegation ? 1 : null, ]); } } catch (ExceptionInterface $e) { diff --git a/src/Controller/PartController.php b/src/Controller/PartController.php index c80afdb7..ab424f50 100644 --- a/src/Controller/PartController.php +++ b/src/Controller/PartController.php @@ -286,8 +286,12 @@ final class PartController extends AbstractController //Force info providers to not use cache, when retrieving part details for creating a new part, because otherwise we might end up with outdated information $no_cache = $request->query->getBoolean('no_cache', false); + $skip_delegation = $request->query->getBoolean('skip_delegation', false); - $dto = $infoRetriever->getDetails($providerKey, $providerId, [InfoProviderInterface::OPTION_NO_CACHE => $no_cache]); + $dto = $infoRetriever->getDetails($providerKey, $providerId, [ + InfoProviderInterface::OPTION_NO_CACHE => $no_cache, + InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + ]); $new_part = $infoRetriever->dtoToPart($dto); if ($new_part->getCategory() === null || $new_part->getCategory()->getID() === null) { diff --git a/src/Form/InfoProviderSystem/FromURLFormType.php b/src/Form/InfoProviderSystem/FromURLFormType.php index cad7a0f5..39ef50f4 100644 --- a/src/Form/InfoProviderSystem/FromURLFormType.php +++ b/src/Form/InfoProviderSystem/FromURLFormType.php @@ -75,6 +75,11 @@ class FromURLFormType extends AbstractType 'required' => false, ]); + $builder->add('skip_delegation', CheckboxType::class, [ + 'label' => 'info_providers.from_url.skip_delegation', + 'required' => false, + ]); + $builder->add('submit', SubmitType::class, [ 'label' => 'info_providers.search.submit', ]); diff --git a/src/Services/InfoProviderSystem/CreateFromUrlHelper.php b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php index 46a2f767..0291142f 100644 --- a/src/Services/InfoProviderSystem/CreateFromUrlHelper.php +++ b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php @@ -24,11 +24,18 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem; use App\Entity\UserSystem\User; +use App\Exceptions\ProviderIDNotSupportedException; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; use Symfony\Bundle\SecurityBundle\Security; final readonly class CreateFromUrlHelper { - public function __construct(private Security $security, private ProviderRegistry $providerRegistry) + public function __construct(private Security $security, + private ProviderRegistry $providerRegistry, + private PartInfoRetriever $infoRetriever, + ) { } @@ -49,4 +56,54 @@ final readonly class CreateFromUrlHelper return $genericWebProvider->isActive() || $aiWebProvider->isActive(); } + + /** + * Delegates the URL to another provider if possible, otherwise return null + * @param string $url + * @return SearchResultDTO|null + */ + public function delegateToOtherProvider(string $url, InfoProviderInterface $callingInfoProvider): ?SearchResultDTO + { + //Extract domain from url: + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return null; + } + + $provider = $this->providerRegistry->getProviderHandlingDomain($host); + + if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $callingInfoProvider->getProviderKey()) { + try { + $id = $provider->getIDFromURL($url); + if ($id !== null) { + $results = $this->infoRetriever->searchByKeyword($id, [$provider]); + if (count($results) > 0) { + return $results[0]; + } + } + return null; + } catch (ProviderIDNotSupportedException $e) { + //Ignore and continue + return null; + } + } + + return null; + } + + /** + * Delegates the URL to another provider if possible and returns the details, otherwise return null + * @param string $url + * @param InfoProviderInterface $callingInfoProvider + * @return PartDetailDTO|null + */ + public function delegateToOtherProviderDetails(string $url, InfoProviderInterface $callingInfoProvider): ?PartDetailDTO + { + $delegatedResult = $this->delegateToOtherProvider($url, $callingInfoProvider); + if ($delegatedResult !== null) { + return $this->infoRetriever->getDetailsForSearchResult($delegatedResult); + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php index d53201cc..8fb7e4ec 100644 --- a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php @@ -27,6 +27,7 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; use App\Services\AI\AIPlatformRegistry; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOJsonSchemaConverter; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Settings\InfoProviderSystem\AIExtractorSettings; @@ -57,7 +58,8 @@ final class AIWebProvider implements InfoProviderInterface private readonly AIExtractorSettings $settings, private readonly AIPlatformRegistry $AIPlatformRegistry, private readonly DTOJsonSchemaConverter $jsonSchemaConverter, - private readonly CacheItemPoolInterface $partInfoCache + private readonly CacheItemPoolInterface $partInfoCache, + private readonly CreateFromUrlHelper $createFromUrlHelper, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us $this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions( @@ -90,9 +92,23 @@ final class AIWebProvider implements InfoProviderInterface public function searchByKeyword(string $keyword, array $options = []): array { + $url = $this->fixAndValidateURL($keyword); + + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } + } + try { + + $new_options = $options; + $new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops + return [ - $this->getDetails($keyword, $options) + $this->getDetails($keyword, $new_options) ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -102,6 +118,14 @@ final class AIWebProvider implements InfoProviderInterface { $url = $this->fixAndValidateURL($id); + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this); + if ($delegatedPart !== null) { + return $delegatedPart; + } + } + //Check if we have a cached result for this URL, to avoid unnecessary LLM calls, which can be slow and costly. $cacheKey = 'ai_web_'.hash('xxh3', $url); diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index 23eee528..06a9d4c1 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -25,6 +25,7 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; @@ -50,14 +51,12 @@ class GenericWebProvider implements InfoProviderInterface use FixAndValidateUrlTrait; - public const OPTION_CHECK_FOR_DELEGATION = 'check_for_delegation'; - public const DISTRIBUTOR_NAME = 'Website'; private readonly HttpClientInterface $httpClient; public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, - private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever, + private readonly CreateFromUrlHelper $createFromUrlHelper, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us @@ -93,15 +92,19 @@ class GenericWebProvider implements InfoProviderInterface { $url = $this->fixAndValidateURL($keyword); - //Before loading the page, try to delegate to another provider - $delegatedPart = $this->delegateToOtherProvider($url); - if ($delegatedPart !== null) { - return [$delegatedPart]; + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } } try { + $new_options = $options; + $new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops return [ - $this->getDetails($keyword, [self::OPTION_CHECK_FOR_DELEGATION => false]) //We already tried delegation + $this->getDetails($keyword, $new_options) ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -278,53 +281,16 @@ class GenericWebProvider implements InfoProviderInterface return null; } - /** - * Delegates the URL to another provider if possible, otherwise return null - * @param string $url - * @return SearchResultDTO|null - */ - private function delegateToOtherProvider(string $url): ?SearchResultDTO - { - //Extract domain from url: - $host = parse_url($url, PHP_URL_HOST); - if ($host === false || $host === null) { - return null; - } - - $provider = $this->providerRegistry->getProviderHandlingDomain($host); - - if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) { - try { - $id = $provider->getIDFromURL($url); - if ($id !== null) { - $results = $this->infoRetriever->searchByKeyword($id, [$provider]); - if (count($results) > 0) { - return $results[0]; - } - } - return null; - } catch (ProviderIDNotSupportedException $e) { - //Ignore and continue - return null; - } - } - - return null; - } - - public function getDetails(string $id, array $options = []): PartDetailDTO { - //We check for delegation by default - $check_for_delegation = $options[self::OPTION_CHECK_FOR_DELEGATION] ?? true; $url = $this->fixAndValidateURL($id); - if ($check_for_delegation) { + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { //Before loading the page, try to delegate to another provider - $delegatedPart = $this->delegateToOtherProvider($url); + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this); if ($delegatedPart !== null) { - return $this->infoRetriever->getDetailsForSearchResult($delegatedPart); + return $delegatedPart; } } diff --git a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php index 8896d94b..a6e073a5 100644 --- a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php +++ b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php @@ -29,6 +29,7 @@ use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; interface InfoProviderInterface { public const OPTION_NO_CACHE = 'no_cache'; // if set to true, the provider should not use any cache and retrieve fresh data from the source + public const OPTION_SKIP_DELEGATION = 'skip_delegation'; // if set to true, the provider should not delegate the request to other providers, even if it supports delegation. /** * Get information about this provider diff --git a/templates/info_providers/from_url/from_url.html.twig b/templates/info_providers/from_url/from_url.html.twig index 15aa225f..49d4b116 100644 --- a/templates/info_providers/from_url/from_url.html.twig +++ b/templates/info_providers/from_url/from_url.html.twig @@ -28,6 +28,7 @@
{{ form_row(form.no_cache) }} + {{ form_row(form.skip_delegation) }}
diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index cb071128..fc42c414 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -13181,5 +13181,11 @@ Buerklin-API Authentication server: Ignore cache / Force fresh info retrieval + + + info_providers.from_url.skip_delegation + Do not delegate to specialized info providers + +