From e77b67445c0a27a16d8dd33a340ca426126cc506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 22:08:25 +0200 Subject: [PATCH 1/9] Added cache to AIWebProvider --- ...{AIInfoExtractor.php => AIWebProvider.php} | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) rename src/Services/InfoProviderSystem/Providers/{AIInfoExtractor.php => AIWebProvider.php} (87%) diff --git a/src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php similarity index 87% rename from src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php rename to src/Services/InfoProviderSystem/Providers/AIWebProvider.php index bf1ce37c..d53201cc 100644 --- a/src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php +++ b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php @@ -33,6 +33,7 @@ use App\Settings\InfoProviderSystem\AIExtractorSettings; use Brick\Schema\SchemaReader; use Jkphl\Micrometa; use League\HTMLToMarkdown\HtmlConverter; +use Psr\Cache\CacheItemPoolInterface; use Symfony\AI\Platform\Message\Message; use Symfony\AI\Platform\Message\MessageBag; use Symfony\Component\DomCrawler\Crawler; @@ -43,11 +44,11 @@ use Symfony\Contracts\HttpClient\HttpClientInterface; use function Symfony\Component\String\u; -final class AIInfoExtractor implements InfoProviderInterface +final class AIWebProvider implements InfoProviderInterface { use FixAndValidateUrlTrait; - private const DISTRIBUTOR_NAME = 'AI Extracted'; + private const DISTRIBUTOR_NAME = 'Website'; private readonly HttpClientInterface $httpClient; @@ -56,6 +57,7 @@ final class AIInfoExtractor implements InfoProviderInterface private readonly AIExtractorSettings $settings, private readonly AIPlatformRegistry $AIPlatformRegistry, private readonly DTOJsonSchemaConverter $jsonSchemaConverter, + private readonly CacheItemPoolInterface $partInfoCache ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us $this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions( @@ -68,17 +70,17 @@ final class AIInfoExtractor implements InfoProviderInterface public function getProviderInfo(): array { return [ - 'name' => 'AI Information Extractor', - 'description' => 'Extract part info from any URL using OpenRouter LLM', + 'name' => 'AI Web Extractor', + 'description' => 'Extract part info from any URL using LLM', //'url' => 'https://openrouter.ai', - 'disabled_help' => 'Configure OpenRouter API key in settings', + 'disabled_help' => 'Configure AI settings', 'settings_class' => AIExtractorSettings::class, ]; } public function getProviderKey(): string { - return 'ai_extractor'; + return 'ai_web'; } public function isActive(): bool @@ -90,7 +92,7 @@ final class AIInfoExtractor implements InfoProviderInterface { try { return [ - $this->getDetails($keyword) + $this->getDetails($keyword, $options) ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -100,16 +102,24 @@ final class AIInfoExtractor implements InfoProviderInterface { $url = $this->fixAndValidateURL($id); + //Check if we have a cached result for this URL, to avoid unnecessary LLM calls, which can be slow and costly. + $cacheKey = 'ai_web_'.hash('xxh3', $url); + + //If ignore cache option is set, skip cache and fetch fresh data + if ($options[self::OPTION_NO_CACHE] ?? false) { + $this->partInfoCache->deleteItem($cacheKey); + } + + //Return cached result if available + $cacheItem = $this->partInfoCache->getItem($cacheKey); + if ($cacheItem->isHit()) { + return $cacheItem->get(); + } + // Fetch HTML content $response = $this->httpClient->request('GET', $url); $html = $response->getContent(); - // Clean HTML - /*$cleanedHtml = $this->cleanHTML($html); - - // Truncate to max content length - $truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength);*/ - //Convert html to markdown, to provide a cleaner input to the LLM. $markdown = $this->htmlToMarkdown($html); //Truncate markdown to max content length, if needed @@ -124,6 +134,11 @@ final class AIInfoExtractor implements InfoProviderInterface // Build and return PartDetailDTO $result = $this->jsonSchemaConverter->jsonToDTO($llmResponse, $this->getProviderKey(), $url, $url, self::DISTRIBUTOR_NAME); + // Cache the result for future use, to improve performance and reduce costs. + $cacheItem->set($result); + $cacheItem->expiresAfter(3600 * 2); //Cache for 2 hours, as web content can change frequently, but we still want to benefit from caching for repeated accesses. + $this->partInfoCache->save($cacheItem); + return $result; } From a2b9ee764d2890c975d58a59973b7b96cc654bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 22:12:36 +0200 Subject: [PATCH 2/9] Added tests for AIPlatformRegistry --- src/Services/AI/AIPlatformRegistry.php | 1 - tests/Services/AI/AIPlatformRegistryTest.php | 99 ++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 tests/Services/AI/AIPlatformRegistryTest.php diff --git a/src/Services/AI/AIPlatformRegistry.php b/src/Services/AI/AIPlatformRegistry.php index bf1d355c..408bb181 100644 --- a/src/Services/AI/AIPlatformRegistry.php +++ b/src/Services/AI/AIPlatformRegistry.php @@ -43,7 +43,6 @@ final readonly class AIPlatformRegistry public function __construct( SettingsManagerInterface $settingsManager, - #[AutowireIterator(tag: 'ai.platform', indexAttribute: 'name')] iterable $platforms, ) { diff --git a/tests/Services/AI/AIPlatformRegistryTest.php b/tests/Services/AI/AIPlatformRegistryTest.php new file mode 100644 index 00000000..1577f9b5 --- /dev/null +++ b/tests/Services/AI/AIPlatformRegistryTest.php @@ -0,0 +1,99 @@ +. + */ + +/** + * Tests for App\Services\AI\AIPlatformRegistry + */ +declare(strict_types=1); + +namespace App\Tests\Services\AI; + +use App\Services\AI\AIPlatformRegistry; +use App\Services\AI\AIPlatforms; +use App\Services\AI\AIPlatformSettingsInterface; +use Jbtronics\SettingsBundle\Manager\SettingsManagerInterface; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\PlatformInterface; + +class AIPlatformRegistryTest extends TestCase +{ + public function testRegistersEnabledPlatformsAndReturnsPlatform(): void + { + // Create a platform mock and expose it under the service tag name (openrouter) + $platformMock = $this->createMock(PlatformInterface::class); + + // Settings for OpenRouter -> enabled + $openRouterSettings = $this->createMock(AIPlatformSettingsInterface::class); + $openRouterSettings->method('isAIPlatformEnabled')->willReturn(true); + + // Settings for LMStudio -> disabled + $lmSettings = $this->createMock(AIPlatformSettingsInterface::class); + $lmSettings->method('isAIPlatformEnabled')->willReturn(false); + + // Settings manager should return the corresponding settings object depending on the requested class name + $settingsManager = $this->createMock(SettingsManagerInterface::class); + $settingsManager->method('get')->willReturnMap([ + [AIPlatforms::OPENROUTER->toSettingsClass(), $openRouterSettings], + [AIPlatforms::LMSTUDIO->toSettingsClass(), $lmSettings], + ]); + + $platforms = new \ArrayIterator([ + AIPlatforms::OPENROUTER->toServiceTagName() => $platformMock, + ]); + + $registry = new AIPlatformRegistry($settingsManager, $platforms); + + // OPENROUTER should be enabled and retrievable + $this->assertTrue($registry->isEnabled(AIPlatforms::OPENROUTER)); + $this->assertSame($platformMock, $registry->getPlatform(AIPlatforms::OPENROUTER)); + + // LMSTUDIO is either not registered or disabled -> should not be enabled + $this->assertFalse($registry->isEnabled(AIPlatforms::LMSTUDIO)); + $this->expectException(\InvalidArgumentException::class); + $registry->getPlatform(AIPlatforms::LMSTUDIO); + } + + public function testGetEnabledPlatformsReturnsIndexedArray(): void + { + $platformMock = $this->createMock(PlatformInterface::class); + + $openRouterSettings = $this->createMock(AIPlatformSettingsInterface::class); + $openRouterSettings->method('isAIPlatformEnabled')->willReturn(true); + + $settingsManager = $this->createMock(SettingsManagerInterface::class); + $settingsManager->method('get')->willReturnMap([ + [AIPlatforms::OPENROUTER->toSettingsClass(), $openRouterSettings], + [AIPlatforms::LMSTUDIO->toSettingsClass(), $this->createMock(AIPlatformSettingsInterface::class)], + ]); + + $platforms = new \ArrayIterator([ + AIPlatforms::OPENROUTER->toServiceTagName() => $platformMock, + // lmstudio not registered + ]); + + $registry = new AIPlatformRegistry($settingsManager, $platforms); + + $enabled = $registry->getEnabledPlatforms(); + + $this->assertArrayHasKey(AIPlatforms::OPENROUTER->value, $enabled); + $this->assertSame($platformMock, $enabled[AIPlatforms::OPENROUTER->value]); + } +} + From aac5b8e0becf1d87b764e5a76082a9f561d53221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 23:23:20 +0200 Subject: [PATCH 3/9] Allow to select which method should be used to in "Create from URL feature" --- src/Controller/InfoProviderController.php | 25 +++--- .../InfoProviderSystem/FromURLFormType.php | 82 +++++++++++++++++++ .../CreateFromUrlHelper.php | 52 ++++++++++++ src/Twig/MiscExtension.php | 13 ++- templates/_navbar.html.twig | 2 +- .../from_url/from_url.html.twig | 15 ++++ translations/messages.en.xlf | 24 ++++++ 7 files changed, 198 insertions(+), 15 deletions(-) create mode 100644 src/Form/InfoProviderSystem/FromURLFormType.php create mode 100644 src/Services/InfoProviderSystem/CreateFromUrlHelper.php diff --git a/src/Controller/InfoProviderController.php b/src/Controller/InfoProviderController.php index cd076d67..074d3894 100644 --- a/src/Controller/InfoProviderController.php +++ b/src/Controller/InfoProviderController.php @@ -26,8 +26,10 @@ namespace App\Controller; use App\Entity\Parts\Manufacturer; use App\Entity\Parts\Part; use App\Exceptions\OAuthReconnectRequiredException; +use App\Form\InfoProviderSystem\FromURLFormType; use App\Form\InfoProviderSystem\PartSearchType; use App\Services\InfoProviderSystem\ExistingPartFinder; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\PartInfoRetriever; use App\Services\InfoProviderSystem\ProviderRegistry; use App\Services\InfoProviderSystem\Providers\GenericWebProvider; @@ -219,35 +221,31 @@ class InfoProviderController extends AbstractController } #[Route('/from_url', name: 'info_providers_from_url')] - public function fromURL(Request $request, GenericWebProvider $provider): Response + public function fromURL(Request $request, GenericWebProvider $provider, CreateFromUrlHelper $fromUrlHelper): Response { $this->denyAccessUnlessGranted('@info_providers.create_parts'); - if (!$provider->isActive()) { + if (!$fromUrlHelper->canCreateFromUrl()) { $this->addFlash('error', "Generic Web Provider is not active. Please enable it in the provider settings."); return $this->redirectToRoute('info_providers_list'); } - $formBuilder = $this->createFormBuilder(); - $formBuilder->add('url', UrlType::class, [ - 'label' => 'info_providers.from_url.url.label', - 'required' => true, - ]); - $formBuilder->add('submit', SubmitType::class, [ - 'label' => 'info_providers.search.submit', - ]); - - $form = $formBuilder->getForm(); + $form = $this->createForm(FromURLFormType::class); $form->handleRequest($request); $partDetail = null; if ($form->isSubmitted() && $form->isValid()) { //Try to retrieve the part detail from the given URL $url = $form->get('url')->getData(); + + $method = $form->get('method')->getData(); + $no_cache = $form->get('no_cache')->getData(); + try { + //It's okay if we use the cached results here, as its just for convenience $searchResult = $this->infoRetriever->searchByKeyword( keyword: $url, - providers: [$provider] + providers: [$method], ); if (count($searchResult) === 0) { @@ -258,6 +256,7 @@ class InfoProviderController extends AbstractController return $this->redirectToRoute('info_providers_create_part', [ 'providerKey' => $searchResult->provider_key, 'providerId' => $searchResult->provider_id, + 'no_cache' => $no_cache ? 1 : null, ]); } } catch (ExceptionInterface $e) { diff --git a/src/Form/InfoProviderSystem/FromURLFormType.php b/src/Form/InfoProviderSystem/FromURLFormType.php new file mode 100644 index 00000000..cad7a0f5 --- /dev/null +++ b/src/Form/InfoProviderSystem/FromURLFormType.php @@ -0,0 +1,82 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Form\InfoProviderSystem; + +use App\Services\InfoProviderSystem\ProviderRegistry; +use Symfony\Component\Form\AbstractType; +use Symfony\Component\Form\Extension\Core\Type\CheckboxType; +use Symfony\Component\Form\Extension\Core\Type\ChoiceType; +use Symfony\Component\Form\Extension\Core\Type\SubmitType; +use Symfony\Component\Form\Extension\Core\Type\UrlType; +use Symfony\Component\Form\FormBuilderInterface; + +class FromURLFormType extends AbstractType +{ + public function __construct(private readonly ProviderRegistry $providerRegistry) + { + + } + + public function buildForm(FormBuilderInterface $builder, array $options): void + { + $builder->add('url', UrlType::class, [ + 'label' => 'info_providers.from_url.url.label', + 'required' => true, + ]); + + + $builder->add('method', ChoiceType::class, [ + 'expanded' => true, + 'data' => 'generic_web', //Default value + 'label' => 'info_providers.from_url.method', + 'choices' => [ + 'info_providers.from_url.method.generic_web' => 'generic_web', + 'info_providers.from_url.method.ai_web' => 'ai_web', + ], + 'choice_attr' => function ($choice, $key, $value) { + //Disable all providers that are not active + $provider = $this->providerRegistry->getProviderByKey($value); + if (!$provider->isActive()) { + return ['disabled' => 'disabled']; + } + + return []; + }, + + //Render the choices as inline radio buttons + 'label_attr' => [ + 'class' => 'radio-inline', + ], + ]); + + $builder->add('no_cache', CheckboxType::class, [ + 'label' => 'info_providers.from_url.no_cache', + 'required' => false, + ]); + + $builder->add('submit', SubmitType::class, [ + 'label' => 'info_providers.search.submit', + ]); + } +} diff --git a/src/Services/InfoProviderSystem/CreateFromUrlHelper.php b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php new file mode 100644 index 00000000..46a2f767 --- /dev/null +++ b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php @@ -0,0 +1,52 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem; + +use App\Entity\UserSystem\User; +use Symfony\Bundle\SecurityBundle\Security; + +final readonly class CreateFromUrlHelper +{ + public function __construct(private Security $security, private ProviderRegistry $providerRegistry) + { + } + + /** + * Checks if at least one provider can create parts from an URL and the current user is allowed to use it. + * This is used to determine if the "From URL" feature should be shown to the user. + * @return bool + */ + public function canCreateFromUrl(): bool + { + if (!$this->security->isGranted('@info_providers.create_parts')) { + return false; + } + + //Check if either the generic web provider or the ai web provider is active + $genericWebProvider = $this->providerRegistry->getProviderByKey('generic_web'); + $aiWebProvider = $this->providerRegistry->getProviderByKey('ai_web'); + + return $genericWebProvider->isActive() || $aiWebProvider->isActive(); + } +} diff --git a/src/Twig/MiscExtension.php b/src/Twig/MiscExtension.php index 390ad084..565d56f2 100644 --- a/src/Twig/MiscExtension.php +++ b/src/Twig/MiscExtension.php @@ -22,6 +22,7 @@ declare(strict_types=1); */ namespace App\Twig; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use Twig\Attribute\AsTwigFunction; use App\Settings\SettingsIcon; use Symfony\Component\HttpFoundation\Request; @@ -34,7 +35,7 @@ use Twig\Extension\AbstractExtension; final readonly class MiscExtension { - public function __construct(private EventCommentNeededHelper $eventCommentNeededHelper) + public function __construct(private EventCommentNeededHelper $eventCommentNeededHelper, private CreateFromUrlHelper $fromUrlHelper) { } @@ -84,4 +85,14 @@ final readonly class MiscExtension return $request->getBaseUrl().$request->getPathInfo().$qs; } + + /** + * Returns true if the from url provider is active, false otherwise. + * @return bool + */ + #[AsTwigFunction(name: 'create_from_url_active')] + public function create_from_url_active(): bool + { + return $this->fromUrlHelper->canCreateFromUrl(); + } } diff --git a/templates/_navbar.html.twig b/templates/_navbar.html.twig index 57331370..7719ab2b 100644 --- a/templates/_navbar.html.twig +++ b/templates/_navbar.html.twig @@ -52,7 +52,7 @@ {% trans %}info_providers.search.title{% endtrans %} - {% if settings_instance('generic_web_provider').enabled %} + {% if create_from_url_active() %}
  • diff --git a/templates/info_providers/from_url/from_url.html.twig b/templates/info_providers/from_url/from_url.html.twig index 3370a94c..15aa225f 100644 --- a/templates/info_providers/from_url/from_url.html.twig +++ b/templates/info_providers/from_url/from_url.html.twig @@ -16,6 +16,21 @@ {{ form_start(form) }} {{ form_row(form.url) }} + + {{ form_row(form.method) }} + + + +
    +
    + {{ form_row(form.no_cache) }} +
    +
    + {{ form_row(form.submit) }} {{ form_end(form) }} {% endblock %} diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index f30ffb9f..cb071128 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -13157,5 +13157,29 @@ Buerklin-API Authentication server: Do not cache result details / Force fresh part detail retrieval + + + info_providers.from_url.method.generic_web + Classic Web Scraper + + + + + info_providers.from_url.method.ai_web + AI Web Scraper + + + + + info_providers.from_url.method + Method + + + + + info_providers.from_url.no_cache + Ignore cache / Force fresh info retrieval + + From 889aa08b4e1abb4323e94db1dc4aa74f369753f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 23:42:26 +0200 Subject: [PATCH 4/9] Added URL delegation feature to AI provider and added option to skip that delegation --- src/Controller/InfoProviderController.php | 5 ++ src/Controller/PartController.php | 6 +- .../InfoProviderSystem/FromURLFormType.php | 5 ++ .../CreateFromUrlHelper.php | 59 +++++++++++++++++- .../Providers/AIWebProvider.php | 28 ++++++++- .../Providers/GenericWebProvider.php | 62 +++++-------------- .../Providers/InfoProviderInterface.php | 1 + .../from_url/from_url.html.twig | 1 + translations/messages.en.xlf | 6 ++ 9 files changed, 121 insertions(+), 52 deletions(-) diff --git a/src/Controller/InfoProviderController.php b/src/Controller/InfoProviderController.php index 074d3894..817a6651 100644 --- a/src/Controller/InfoProviderController.php +++ b/src/Controller/InfoProviderController.php @@ -240,12 +240,16 @@ class InfoProviderController extends AbstractController $method = $form->get('method')->getData(); $no_cache = $form->get('no_cache')->getData(); + $skip_delegation = $form->get('skip_delegation')->getData(); try { //It's okay if we use the cached results here, as its just for convenience $searchResult = $this->infoRetriever->searchByKeyword( keyword: $url, providers: [$method], + options: [ + InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + ] ); if (count($searchResult) === 0) { @@ -257,6 +261,7 @@ class InfoProviderController extends AbstractController 'providerKey' => $searchResult->provider_key, 'providerId' => $searchResult->provider_id, 'no_cache' => $no_cache ? 1 : null, + 'skip_delegation' => $skip_delegation ? 1 : null, ]); } } catch (ExceptionInterface $e) { diff --git a/src/Controller/PartController.php b/src/Controller/PartController.php index c80afdb7..ab424f50 100644 --- a/src/Controller/PartController.php +++ b/src/Controller/PartController.php @@ -286,8 +286,12 @@ final class PartController extends AbstractController //Force info providers to not use cache, when retrieving part details for creating a new part, because otherwise we might end up with outdated information $no_cache = $request->query->getBoolean('no_cache', false); + $skip_delegation = $request->query->getBoolean('skip_delegation', false); - $dto = $infoRetriever->getDetails($providerKey, $providerId, [InfoProviderInterface::OPTION_NO_CACHE => $no_cache]); + $dto = $infoRetriever->getDetails($providerKey, $providerId, [ + InfoProviderInterface::OPTION_NO_CACHE => $no_cache, + InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + ]); $new_part = $infoRetriever->dtoToPart($dto); if ($new_part->getCategory() === null || $new_part->getCategory()->getID() === null) { diff --git a/src/Form/InfoProviderSystem/FromURLFormType.php b/src/Form/InfoProviderSystem/FromURLFormType.php index cad7a0f5..39ef50f4 100644 --- a/src/Form/InfoProviderSystem/FromURLFormType.php +++ b/src/Form/InfoProviderSystem/FromURLFormType.php @@ -75,6 +75,11 @@ class FromURLFormType extends AbstractType 'required' => false, ]); + $builder->add('skip_delegation', CheckboxType::class, [ + 'label' => 'info_providers.from_url.skip_delegation', + 'required' => false, + ]); + $builder->add('submit', SubmitType::class, [ 'label' => 'info_providers.search.submit', ]); diff --git a/src/Services/InfoProviderSystem/CreateFromUrlHelper.php b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php index 46a2f767..0291142f 100644 --- a/src/Services/InfoProviderSystem/CreateFromUrlHelper.php +++ b/src/Services/InfoProviderSystem/CreateFromUrlHelper.php @@ -24,11 +24,18 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem; use App\Entity\UserSystem\User; +use App\Exceptions\ProviderIDNotSupportedException; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; use Symfony\Bundle\SecurityBundle\Security; final readonly class CreateFromUrlHelper { - public function __construct(private Security $security, private ProviderRegistry $providerRegistry) + public function __construct(private Security $security, + private ProviderRegistry $providerRegistry, + private PartInfoRetriever $infoRetriever, + ) { } @@ -49,4 +56,54 @@ final readonly class CreateFromUrlHelper return $genericWebProvider->isActive() || $aiWebProvider->isActive(); } + + /** + * Delegates the URL to another provider if possible, otherwise return null + * @param string $url + * @return SearchResultDTO|null + */ + public function delegateToOtherProvider(string $url, InfoProviderInterface $callingInfoProvider): ?SearchResultDTO + { + //Extract domain from url: + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return null; + } + + $provider = $this->providerRegistry->getProviderHandlingDomain($host); + + if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $callingInfoProvider->getProviderKey()) { + try { + $id = $provider->getIDFromURL($url); + if ($id !== null) { + $results = $this->infoRetriever->searchByKeyword($id, [$provider]); + if (count($results) > 0) { + return $results[0]; + } + } + return null; + } catch (ProviderIDNotSupportedException $e) { + //Ignore and continue + return null; + } + } + + return null; + } + + /** + * Delegates the URL to another provider if possible and returns the details, otherwise return null + * @param string $url + * @param InfoProviderInterface $callingInfoProvider + * @return PartDetailDTO|null + */ + public function delegateToOtherProviderDetails(string $url, InfoProviderInterface $callingInfoProvider): ?PartDetailDTO + { + $delegatedResult = $this->delegateToOtherProvider($url, $callingInfoProvider); + if ($delegatedResult !== null) { + return $this->infoRetriever->getDetailsForSearchResult($delegatedResult); + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php index d53201cc..8fb7e4ec 100644 --- a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php @@ -27,6 +27,7 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; use App\Services\AI\AIPlatformRegistry; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOJsonSchemaConverter; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Settings\InfoProviderSystem\AIExtractorSettings; @@ -57,7 +58,8 @@ final class AIWebProvider implements InfoProviderInterface private readonly AIExtractorSettings $settings, private readonly AIPlatformRegistry $AIPlatformRegistry, private readonly DTOJsonSchemaConverter $jsonSchemaConverter, - private readonly CacheItemPoolInterface $partInfoCache + private readonly CacheItemPoolInterface $partInfoCache, + private readonly CreateFromUrlHelper $createFromUrlHelper, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us $this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions( @@ -90,9 +92,23 @@ final class AIWebProvider implements InfoProviderInterface public function searchByKeyword(string $keyword, array $options = []): array { + $url = $this->fixAndValidateURL($keyword); + + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } + } + try { + + $new_options = $options; + $new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops + return [ - $this->getDetails($keyword, $options) + $this->getDetails($keyword, $new_options) ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -102,6 +118,14 @@ final class AIWebProvider implements InfoProviderInterface { $url = $this->fixAndValidateURL($id); + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this); + if ($delegatedPart !== null) { + return $delegatedPart; + } + } + //Check if we have a cached result for this URL, to avoid unnecessary LLM calls, which can be slow and costly. $cacheKey = 'ai_web_'.hash('xxh3', $url); diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index 23eee528..06a9d4c1 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -25,6 +25,7 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; +use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; @@ -50,14 +51,12 @@ class GenericWebProvider implements InfoProviderInterface use FixAndValidateUrlTrait; - public const OPTION_CHECK_FOR_DELEGATION = 'check_for_delegation'; - public const DISTRIBUTOR_NAME = 'Website'; private readonly HttpClientInterface $httpClient; public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, - private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever, + private readonly CreateFromUrlHelper $createFromUrlHelper, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us @@ -93,15 +92,19 @@ class GenericWebProvider implements InfoProviderInterface { $url = $this->fixAndValidateURL($keyword); - //Before loading the page, try to delegate to another provider - $delegatedPart = $this->delegateToOtherProvider($url); - if ($delegatedPart !== null) { - return [$delegatedPart]; + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } } try { + $new_options = $options; + $new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops return [ - $this->getDetails($keyword, [self::OPTION_CHECK_FOR_DELEGATION => false]) //We already tried delegation + $this->getDetails($keyword, $new_options) ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -278,53 +281,16 @@ class GenericWebProvider implements InfoProviderInterface return null; } - /** - * Delegates the URL to another provider if possible, otherwise return null - * @param string $url - * @return SearchResultDTO|null - */ - private function delegateToOtherProvider(string $url): ?SearchResultDTO - { - //Extract domain from url: - $host = parse_url($url, PHP_URL_HOST); - if ($host === false || $host === null) { - return null; - } - - $provider = $this->providerRegistry->getProviderHandlingDomain($host); - - if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) { - try { - $id = $provider->getIDFromURL($url); - if ($id !== null) { - $results = $this->infoRetriever->searchByKeyword($id, [$provider]); - if (count($results) > 0) { - return $results[0]; - } - } - return null; - } catch (ProviderIDNotSupportedException $e) { - //Ignore and continue - return null; - } - } - - return null; - } - - public function getDetails(string $id, array $options = []): PartDetailDTO { - //We check for delegation by default - $check_for_delegation = $options[self::OPTION_CHECK_FOR_DELEGATION] ?? true; $url = $this->fixAndValidateURL($id); - if ($check_for_delegation) { + if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) { //Before loading the page, try to delegate to another provider - $delegatedPart = $this->delegateToOtherProvider($url); + $delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this); if ($delegatedPart !== null) { - return $this->infoRetriever->getDetailsForSearchResult($delegatedPart); + return $delegatedPart; } } diff --git a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php index 8896d94b..a6e073a5 100644 --- a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php +++ b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php @@ -29,6 +29,7 @@ use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; interface InfoProviderInterface { public const OPTION_NO_CACHE = 'no_cache'; // if set to true, the provider should not use any cache and retrieve fresh data from the source + public const OPTION_SKIP_DELEGATION = 'skip_delegation'; // if set to true, the provider should not delegate the request to other providers, even if it supports delegation. /** * Get information about this provider diff --git a/templates/info_providers/from_url/from_url.html.twig b/templates/info_providers/from_url/from_url.html.twig index 15aa225f..49d4b116 100644 --- a/templates/info_providers/from_url/from_url.html.twig +++ b/templates/info_providers/from_url/from_url.html.twig @@ -28,6 +28,7 @@
    {{ form_row(form.no_cache) }} + {{ form_row(form.skip_delegation) }}
    diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index cb071128..fc42c414 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -13181,5 +13181,11 @@ Buerklin-API Authentication server: Ignore cache / Force fresh info retrieval + + + info_providers.from_url.skip_delegation + Do not delegate to specialized info providers + + From e437bb0b7bc75c72d62a175171bb0305555131d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 23:49:07 +0200 Subject: [PATCH 5/9] Improved translations of AI related stuff in settings --- src/Settings/AISettings/LMStudioSettings.php | 2 ++ translations/messages.en.xlf | 24 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/Settings/AISettings/LMStudioSettings.php b/src/Settings/AISettings/LMStudioSettings.php index 627961a9..2bdad06e 100644 --- a/src/Settings/AISettings/LMStudioSettings.php +++ b/src/Settings/AISettings/LMStudioSettings.php @@ -31,6 +31,7 @@ use Jbtronics\SettingsBundle\Settings\Settings; use Jbtronics\SettingsBundle\Settings\SettingsParameter; use Jbtronics\SettingsBundle\Settings\SettingsTrait; use Symfony\Component\Form\Extension\Core\Type\UrlType; +use Symfony\Component\Translation\StaticMessage; use Symfony\Component\Translation\TranslatableMessage as TM; #[Settings(name: 'ai_lmstudio', label: new TM("settings.ai.lmstudio"))] @@ -41,6 +42,7 @@ class LMStudioSettings implements AIPlatformSettingsInterface #[SettingsParameter(label: new TM("settings.ai.lmstudio.hosturl"), formType: UrlType::class, + formOptions: ["attr" => ["placeholder" => new StaticMessage("http://localhost:1234")]], envVar: "AI_LMSTUDIO_HOSTURL", envVarMode: EnvVarMode::OVERWRITE)] public ?string $hostURL = null; diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index fc42c414..8af54745 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -13187,5 +13187,29 @@ Buerklin-API Authentication server: Do not delegate to specialized info providers + + + settings.ips.ai_extractor + AI Web Extractor + + + + + settings.ips.ai_extractor.description + This info provider uses an large language model (LLM) to extract detailed part information from arbitary shop URLs. + + + + + settings.ai.openrouter.help + Access to many AI models via openrouter.ai + + + + + settings.ai.lmstudio.hosturl + Host URL + + From 9c317db260d8ee0c49cbd5575659fc28b996b647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sat, 2 May 2026 23:51:34 +0200 Subject: [PATCH 6/9] Do not translate domain canopy domain settings choices This removes clutter from the translation panel --- src/Settings/InfoProviderSystem/CanopySettings.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Settings/InfoProviderSystem/CanopySettings.php b/src/Settings/InfoProviderSystem/CanopySettings.php index 0858871b..3c97a80e 100644 --- a/src/Settings/InfoProviderSystem/CanopySettings.php +++ b/src/Settings/InfoProviderSystem/CanopySettings.php @@ -72,7 +72,7 @@ class CanopySettings /** * @var string The domain used internally for the API requests. This is not necessarily the same as the domain shown to the user, which is determined by the keys of the ALLOWED_DOMAINS constant */ - #[SettingsParameter(label: new TM("settings.ips.tme.country"), formType: ChoiceType::class, formOptions: ["choices" => self::ALLOWED_DOMAINS])] + #[SettingsParameter(label: new TM("settings.ips.tme.country"), formType: ChoiceType::class, formOptions: ["choices" => self::ALLOWED_DOMAINS, 'translation_domain' => false])] public string $domain = "DE"; /** From db86b8c33001bc1d871d4e33a830394d33635d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 3 May 2026 00:08:00 +0200 Subject: [PATCH 7/9] Accept all models for openrouter ai provider --- src/Services/AI/AcceptAllModelsCatalog.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Services/AI/AcceptAllModelsCatalog.php b/src/Services/AI/AcceptAllModelsCatalog.php index bf590128..a2f5c33a 100644 --- a/src/Services/AI/AcceptAllModelsCatalog.php +++ b/src/Services/AI/AcceptAllModelsCatalog.php @@ -34,6 +34,7 @@ use Symfony\Component\DependencyInjection\Attribute\AsDecorator; * This is a workaround for outdated/incomplete model catalogs provided by AI platforms, which do not contain all available models, or do not update their catalogs frequently enough. */ #[AsDecorator('ai.platform.model_catalog.lmstudio')] +#[AsDecorator('ai.platform.model_catalog.openrouter')] final readonly class AcceptAllModelsCatalog implements ModelCatalogInterface { From 21bad8126237a10831dd0e40a037bc691323c47d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 3 May 2026 00:18:38 +0200 Subject: [PATCH 8/9] Fixed phpstan issues --- src/Controller/TypeaheadController.php | 4 ++-- src/Form/Settings/AiPlatformChoiceType.php | 2 +- src/Services/AI/AIPlatforms.php | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Controller/TypeaheadController.php b/src/Controller/TypeaheadController.php index c4cd5607..f7e15b6d 100644 --- a/src/Controller/TypeaheadController.php +++ b/src/Controller/TypeaheadController.php @@ -244,7 +244,7 @@ class TypeaheadController extends AbstractController $capability_filter = $request->query->getEnum('capability', Capability::class); - $models = $cache->get('ai_models_'.$platform->value.'_'.($capability_filter?->value ?? 'all'), + $models = $cache->get('ai_models_'.$platform->value.'_'.($capability_filter->value ?? 'all'), function (ItemInterface $item) use ($platformRegistry, $platform, $capability_filter) { $item->expiresAfter(3600); //Cache for 1 hour if ($capability_filter === null) { @@ -253,7 +253,7 @@ class TypeaheadController extends AbstractController //Otherwise filter the models by the capability return array_filter($platformRegistry->getPlatform($platform)->getModelCatalog()->getModels(), - static fn(array $model) => in_array($capability_filter, $model['capabilities'] ?? [], true) + static fn(array $model) => in_array($capability_filter, $model['capabilities'], true) ); }); diff --git a/src/Form/Settings/AiPlatformChoiceType.php b/src/Form/Settings/AiPlatformChoiceType.php index eb48d933..82ea66b2 100644 --- a/src/Form/Settings/AiPlatformChoiceType.php +++ b/src/Form/Settings/AiPlatformChoiceType.php @@ -41,7 +41,7 @@ final class AiPlatformChoiceType extends AbstractType { } - public function getParent(): ?string + public function getParent(): string { return EnumType::class; } diff --git a/src/Services/AI/AIPlatforms.php b/src/Services/AI/AIPlatforms.php index ec772cf3..2f4d6317 100644 --- a/src/Services/AI/AIPlatforms.php +++ b/src/Services/AI/AIPlatforms.php @@ -52,8 +52,6 @@ enum AIPlatforms: string implements TranslatableInterface return match ($this) { self::LMSTUDIO => LMStudioSettings::class, self::OPENROUTER => OpenRouterSettings::class, - - default => throw new \InvalidArgumentException(sprintf('No settings class defined for AI platform "%s".', $this->name)), }; } From a15a5efdcee1a3b0a21af8d2d1348b096996b857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 3 May 2026 00:35:49 +0200 Subject: [PATCH 9/9] Added documentation about AI features --- README.md | 1 + docs/index.md | 1 + docs/usage/ai.md | 27 +++++++++++++++++++++++ docs/usage/information_provider_system.md | 13 +++++++++++ 4 files changed, 42 insertions(+) create mode 100644 docs/usage/ai.md diff --git a/README.md b/README.md index ad37e9c6..b857711f 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ for the first time. * Automatic thumbnail generation for pictures * Use cloud providers (like Octopart, Digikey, Farnell, LCSC or TME) to automatically get part information, datasheets, and prices for parts +* Retrieve part information from arbitrary shop websites, using either conventional data extraction from structured metadata, or AI based data extraction * API to access Part-DB from other applications/scripts * [Integration with KiCad](https://docs.part-db.de/usage/eda_integration.html): Use Part-DB as the central datasource for your KiCad and see available parts from Part-DB directly inside KiCad. diff --git a/docs/index.md b/docs/index.md index c2128946..700937f4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -47,6 +47,7 @@ It is installed on a web server and so can be accessed with any browser without * Easy migration from an existing PartKeepr instance (see [here]({%link partkeepr_migration.md %})) * Use cloud providers (like Octopart, Digikey, Farnell, Mouser, or TME) to automatically get part information, datasheets, and prices for parts (see [here]({% link usage/information_provider_system.md %})) +* Retrieve part information from arbitrary shop websites, using either conventional data extraction from structured metadata, or AI based data extraction * API to access Part-DB from other applications/scripts * [Integration with KiCad]({%link usage/eda_integration.md %}): Use Part-DB as the central datasource for your KiCad and see available parts from Part-DB directly inside KiCad. diff --git a/docs/usage/ai.md b/docs/usage/ai.md new file mode 100644 index 00000000..3a1fb419 --- /dev/null +++ b/docs/usage/ai.md @@ -0,0 +1,27 @@ +--- +layout: default +title: AI features +nav_order: 6 +parent: Usage +--- + +# AI features + +Part-DB can utilize large language Models (LLMs) to provide AI-powered features that can assist you in managing your parts and projects. +For now this is mostly the ability to extract part information from websites without any structured data. + +## AI platforms + +Part-DB is platform agnostic and can work with different AI platforms, both locally and in the cloud. They can be configured in the "AI" tab in the system settings. +Currently, the following platforms are supported: + +### OpenRouter + +[OpenRouter](https://openrouter.ai/) is a platform that provides access to various LLMs, including models from OpenAI, Anthropic, and more. +You can use OpenRouter to connect to different LLMs and use them for Part-DB's AI features. +You need to supply an API key for OpenRouter to use it as an AI platform in Part-DB. + +### LMStudio + +[LMStudio](https://lmstudio.ai/) is a local LLM hosting solution that allows you to run LLMs on your own hardware. You can use LMStudio to host your own LLM and connect it to Part-DB for AI features. +Currently only LMStudio without any authentication is supported. Supply your LMStudio instance URL (including the port) to use it as an AI platform in Part-DB. diff --git a/docs/usage/information_provider_system.md b/docs/usage/information_provider_system.md index 1600d76f..7cac6328 100644 --- a/docs/usage/information_provider_system.md +++ b/docs/usage/information_provider_system.md @@ -111,6 +111,19 @@ may have privacy and security implications. Following env configuration options are available: * `PROVIDER_GENERIC_WEB_ENABLED`: Set this to `1` to enable the Generic Web URL Provider (optional, default: `0`) +### AI Web Extractor +The AI web extractor provider can extract part information from any webpage using AI-based techniques. It is designed to handle unstructured data and can extract relevant information even from websites that do not use structured data formats like Schema.org. +This provider can be particularly useful for extracting information from websites that have complex layouts or do not follow standard e-commerce practices. +It also potentially extracts more detailed information than the Generic Web URL Provider, as it is not limited to the fields defined in the Schema.org format. + +To use the AI Web Extractor, you need to setup an AI platform, in the AI settings tab, and chose a model, which support structured output. +For many use cases a small and cheap model like `google/gemini-2.5-flash-lite` will be sufficient, coming down to costs like 0.003$ per request. +For more complex websites, or if you wanna use the LLM for translation purposes too, you should consider a more powerful model. + +You can add some additional instructions for the model, which gets added to the system prompt, to tweak the output of the model. + +The provider will download the HTML of the given URL, convert it to markdown and send it to the LLM toghether with structured data extracted from the webpage via conventional methods. + ### Octopart The Octopart provider uses the [Octopart / Nexar API](https://nexar.com/api) to search for parts and get information.