From 10acc2e1300cad57766ba82c7e0ad9042817b232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 1 Feb 2026 20:49:50 +0100 Subject: [PATCH 1/3] Added logic to delegate the info retrieval logic to another provider when giving an URL --- .../InfoProviderSystem/ProviderRegistry.php | 38 +++++++++++++++- .../Providers/GenericWebProvider.php | 44 ++++++++++++++++++- .../Providers/PollinProvider.php | 37 +++++++++++++--- .../URLHandlerInfoProviderInterface.php | 43 ++++++++++++++++++ .../ProviderRegistryTest.php | 18 +++++++- 5 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php diff --git a/src/Services/InfoProviderSystem/ProviderRegistry.php b/src/Services/InfoProviderSystem/ProviderRegistry.php index f6c398d2..18b8a37a 100644 --- a/src/Services/InfoProviderSystem/ProviderRegistry.php +++ b/src/Services/InfoProviderSystem/ProviderRegistry.php @@ -24,6 +24,7 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; /** * This class keeps track of all registered info providers and allows to find them by their key @@ -47,6 +48,8 @@ final class ProviderRegistry */ private array $providers_disabled = []; + private array $providers_by_domain = []; + /** * @var bool Whether the registry has been initialized */ @@ -78,6 +81,14 @@ final class ProviderRegistry $this->providers_by_name[$key] = $provider; if ($provider->isActive()) { $this->providers_active[$key] = $provider; + if ($provider instanceof URLHandlerInfoProviderInterface) { + foreach ($provider->getHandledDomains() as $domain) { + if (isset($this->providers_by_domain[$domain])) { + throw new \LogicException("Domain $domain is already handled by another provider"); + } + $this->providers_by_domain[$domain] = $provider; + } + } } else { $this->providers_disabled[$key] = $provider; } @@ -139,4 +150,29 @@ final class ProviderRegistry return $this->providers_disabled; } -} \ No newline at end of file + + public function getProviderHandlingDomain(string $domain): (InfoProviderInterface&URLHandlerInfoProviderInterface)|null + { + if (!$this->initialized) { + $this->initStructures(); + } + + //Check if the domain is directly existing: + if (isset($this->providers_by_domain[$domain])) { + return $this->providers_by_domain[$domain]; + } + + //Otherwise check for subdomains: + $parts = explode('.', $domain); + while (count($parts) > 2) { + array_shift($parts); + $check_domain = implode('.', $parts); + if (isset($this->providers_by_domain[$check_domain])) { + return $this->providers_by_domain[$check_domain]; + } + } + + //If we found nothing, return null + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index 4b73ad6e..bca3d7cb 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -28,8 +28,9 @@ use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\PartInfoRetriever; +use App\Services\InfoProviderSystem\ProviderRegistry; use App\Settings\InfoProviderSystem\GenericWebProviderSettings; -use PhpOffice\PhpSpreadsheet\Calculation\Financial\Securities\Price; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -40,7 +41,9 @@ class GenericWebProvider implements InfoProviderInterface private readonly HttpClientInterface $httpClient; - public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings) + public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, + private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever, + ) { $this->httpClient = $httpClient->withOptions( [ @@ -228,6 +231,37 @@ class GenericWebProvider implements InfoProviderInterface return null; } + /** + * Delegates the URL to another provider if possible, otherwise return null + * @param string $url + * @return PartDetailDTO|null + */ + private function delegateToOtherProvider(string $url): ?PartDetailDTO + { + //Extract domain from url: + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return null; + } + + $provider = $this->providerRegistry->getProviderHandlingDomain($host); + + if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) { + try { + $id = $provider->getIDFromURL($url); + if ($id !== null) { + return $this->infoRetriever->getDetails($provider->getProviderKey(), $id); + } + return null; + } catch (ProviderIDNotSupportedException $e) { + //Ignore and continue + return null; + } + } + + return null; + } + public function getDetails(string $id): PartDetailDTO { //Add scheme if missing @@ -247,6 +281,12 @@ class GenericWebProvider implements InfoProviderInterface throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id); } + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return $delegatedPart; + } + //Try to get the webpage content $response = $this->httpClient->request('GET', $url); $content = $response->getContent(); diff --git a/src/Services/InfoProviderSystem/Providers/PollinProvider.php b/src/Services/InfoProviderSystem/Providers/PollinProvider.php index 2c5d68a3..6ac969d3 100644 --- a/src/Services/InfoProviderSystem/Providers/PollinProvider.php +++ b/src/Services/InfoProviderSystem/Providers/PollinProvider.php @@ -36,7 +36,7 @@ use Symfony\Component\DependencyInjection\Attribute\Autowire; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; -class PollinProvider implements InfoProviderInterface +class PollinProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { public function __construct(private readonly HttpClientInterface $client, @@ -141,11 +141,16 @@ class PollinProvider implements InfoProviderInterface $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here //Calculate the mass - $massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); - //Remove the unit - $massStr = str_replace('kg', '', $massStr); - //Convert to float and convert to grams - $mass = (float) $massStr * 1000; + $massDom = $dom->filter('meta[itemprop="weight"]'); + if ($massDom->count() > 0) { + $massStr = $massDom->attr('content'); + $massStr = str_replace('kg', '', $massStr); + //Convert to float and convert to grams + $mass = (float) $massStr * 1000; + } else { + $mass = null; + } + //Parse purchase info $purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl); @@ -248,4 +253,22 @@ class PollinProvider implements InfoProviderInterface ProviderCapabilities::DATASHEET ]; } -} \ No newline at end of file + + public function getHandledDomains(): array + { + return ['pollin.de']; + } + + public function getIDFromURL(string $url): ?string + { + //URL like: https://www.pollin.de/p/shelly-bluetooth-schalter-und-dimmer-blu-zb-button-plug-play-mocha-592325 + + //Extract the 6-digit number at the end of the URL + $matches = []; + if (preg_match('/-(\d{6})(?:\/|$)/', $url, $matches)) { + return $matches[1]; + } + + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php new file mode 100644 index 00000000..c0506648 --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php @@ -0,0 +1,43 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +/** + * If an interface + */ +interface URLHandlerInfoProviderInterface +{ + /** + * Returns a list of supported domains (e.g. ["digikey.com"]) + * @return array An array of supported domains + */ + public function getHandledDomains(): array; + + /** + * Extracts the unique ID of a part from a given URL. It is okay if this is not a canonical ID, as long as it can be used to uniquely identify the part within this provider. + * @param string $url The URL to extract the ID from + * @return string|null The extracted ID, or null if the URL is not valid for this provider + */ + public function getIDFromURL(string $url): ?string; +} diff --git a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php index 9026c5bf..48a1847f 100644 --- a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php +++ b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php @@ -24,6 +24,7 @@ namespace App\Tests\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\ProviderRegistry; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; use PHPUnit\Framework\TestCase; class ProviderRegistryTest extends TestCase @@ -44,9 +45,10 @@ class ProviderRegistryTest extends TestCase public function getMockProvider(string $key, bool $active = true): InfoProviderInterface { - $mock = $this->createMock(InfoProviderInterface::class); + $mock = $this->createMockForIntersectionOfInterfaces([InfoProviderInterface::class, URLHandlerInfoProviderInterface::class]); $mock->method('getProviderKey')->willReturn($key); $mock->method('isActive')->willReturn($active); + $mock->method('getHandledDomains')->willReturn(["$key.com", "test.$key.de"]); return $mock; } @@ -109,4 +111,18 @@ class ProviderRegistryTest extends TestCase $registry->getProviders(); } + + public function testGetProviderHandlingDomain(): void + { + $registry = new ProviderRegistry($this->providers); + + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('test1.com')); + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('www.test1.com')); //Subdomain should also work + + $this->assertEquals( + $this->providers[1], + $registry->getProviderHandlingDomain('test.test2.de') + ); + } + } From 24f0f0d23c325a23ab4d7d000b90e36e1fd80975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 1 Feb 2026 21:18:06 +0100 Subject: [PATCH 2/3] Added URL handling to a few more existing info providers --- .../Providers/ConradProvider.php | 25 ++++++++- .../Providers/Element14Provider.php | 19 ++++++- .../Providers/GenericWebProvider.php | 51 +++++++++++++------ .../Providers/LCSCProvider.php | 19 ++++++- .../Providers/TMEProvider.php | 20 +++++++- 5 files changed, 115 insertions(+), 19 deletions(-) diff --git a/src/Services/InfoProviderSystem/Providers/ConradProvider.php b/src/Services/InfoProviderSystem/Providers/ConradProvider.php index 6212f148..32434dee 100644 --- a/src/Services/InfoProviderSystem/Providers/ConradProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ConradProvider.php @@ -30,9 +30,10 @@ use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\ConradSettings; +use App\Settings\InfoProviderSystem\ConradShopIDs; use Symfony\Contracts\HttpClient\HttpClientInterface; -readonly class ConradProvider implements InfoProviderInterface +readonly class ConradProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const SEARCH_ENDPOINT = '/search/1/v3/facetSearch'; @@ -317,4 +318,26 @@ readonly class ConradProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + $domains = []; + foreach (ConradShopIDs::cases() as $shopID) { + $domains[] = $shopID->getDomain(); + } + return array_unique($domains); + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.conrad.de/de/p/apple-iphone-air-wolkenweiss-256-gb-eek-a-a-g-16-5-cm-6-5-zoll-3475299.html + //The numbers before the optional .html are the product ID + + $matches = []; + if (preg_match('/-(\d+)(\.html)?$/', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/Element14Provider.php b/src/Services/InfoProviderSystem/Providers/Element14Provider.php index 27dfb908..9ae45728 100644 --- a/src/Services/InfoProviderSystem/Providers/Element14Provider.php +++ b/src/Services/InfoProviderSystem/Providers/Element14Provider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\Element14Settings; use Composer\CaBundle\CaBundle; use Symfony\Contracts\HttpClient\HttpClientInterface; -class Element14Provider implements InfoProviderInterface +class Element14Provider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://api.element14.com/catalog/products'; @@ -309,4 +309,21 @@ class Element14Provider implements InfoProviderInterface ProviderCapabilities::DATASHEET, ]; } + + public function getHandledDomains(): array + { + return ['element14.com', 'farnell.com', 'newark.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input URL example: https://de.farnell.com/on-semiconductor/bc547b/transistor-npn-to-92/dp/1017673 + //The digits after the /dp/ are the part ID + $matches = []; + if (preg_match('#/dp/(\d+)#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index bca3d7cb..d06a6105 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -28,6 +28,7 @@ use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Services\InfoProviderSystem\PartInfoRetriever; use App\Services\InfoProviderSystem\ProviderRegistry; use App\Settings\InfoProviderSystem\GenericWebProviderSettings; @@ -78,9 +79,17 @@ class GenericWebProvider implements InfoProviderInterface public function searchByKeyword(string $keyword): array { + $url = $this->fixAndValidateURL($keyword); + + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } + try { return [ - $this->getDetails($keyword) + $this->getDetails($keyword, false) //We already tried delegation ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -234,9 +243,9 @@ class GenericWebProvider implements InfoProviderInterface /** * Delegates the URL to another provider if possible, otherwise return null * @param string $url - * @return PartDetailDTO|null + * @return SearchResultDTO|null */ - private function delegateToOtherProvider(string $url): ?PartDetailDTO + private function delegateToOtherProvider(string $url): ?SearchResultDTO { //Extract domain from url: $host = parse_url($url, PHP_URL_HOST); @@ -250,7 +259,10 @@ class GenericWebProvider implements InfoProviderInterface try { $id = $provider->getIDFromURL($url); if ($id !== null) { - return $this->infoRetriever->getDetails($provider->getProviderKey(), $id); + $results = $this->infoRetriever->searchByKeyword($id, [$provider]); + if (count($results) > 0) { + return $results[0]; + } } return null; } catch (ProviderIDNotSupportedException $e) { @@ -262,29 +274,38 @@ class GenericWebProvider implements InfoProviderInterface return null; } - public function getDetails(string $id): PartDetailDTO + private function fixAndValidateURL(string $url): string { + $originalUrl = $url; + //Add scheme if missing - if (!preg_match('/^https?:\/\//', $id)) { + if (!preg_match('/^https?:\/\//', $url)) { //Remove any leading slashes - $id = ltrim($id, '/'); + $url = ltrim($url, '/'); - $id = 'https://'.$id; + $url = 'https://'.$url; } - $url = $id; - //If this is not a valid URL with host, domain and path, throw an exception if (filter_var($url, FILTER_VALIDATE_URL) === false || parse_url($url, PHP_URL_HOST) === null || parse_url($url, PHP_URL_PATH) === null) { - throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id); + throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl); } - //Before loading the page, try to delegate to another provider - $delegatedPart = $this->delegateToOtherProvider($url); - if ($delegatedPart !== null) { - return $delegatedPart; + return $url; + } + + public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO + { + $url = $this->fixAndValidateURL($id); + + if ($check_for_delegation) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return $delegatedPart; + } } //Try to get the webpage content diff --git a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php index ede34eb8..1b807eff 100755 --- a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php +++ b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\LCSCSettings; use Symfony\Component\HttpFoundation\Cookie; use Symfony\Contracts\HttpClient\HttpClientInterface; -class LCSCProvider implements BatchInfoProviderInterface +class LCSCProvider implements BatchInfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://wmsc.lcsc.com/ftps/wm'; @@ -452,4 +452,21 @@ class LCSCProvider implements BatchInfoProviderInterface ProviderCapabilities::FOOTPRINT, ]; } + + public function getHandledDomains(): array + { + return ['lcsc.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input example: https://www.lcsc.com/product-detail/C258144.html?s_z=n_BC547 + //The part between the "C" and the ".html" is the unique ID + + $matches = []; + if (preg_match("#/product-detail/(\w+)\.html#", $url, $matches) > 0) { + return $matches[1]; + } + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/TMEProvider.php b/src/Services/InfoProviderSystem/Providers/TMEProvider.php index 9bc73f09..938bc7b3 100644 --- a/src/Services/InfoProviderSystem/Providers/TMEProvider.php +++ b/src/Services/InfoProviderSystem/Providers/TMEProvider.php @@ -32,7 +32,7 @@ use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\TMESettings; -class TMEProvider implements InfoProviderInterface +class TMEProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const VENDOR_NAME = 'TME'; @@ -296,4 +296,22 @@ class TMEProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + return ['tme.eu']; + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.tme.eu/de/details/fi321_se/kuhler/alutronic/ + //The ID is the part after the details segment and before the next slash + + $matches = []; + if (preg_match('#/details/([^/]+)/#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } From a1396c6696f4a9b6421a400b529a42176b3ea9cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Sun, 1 Feb 2026 21:19:11 +0100 Subject: [PATCH 3/3] Fixed delegation logic for PartDetailDTO --- .../InfoProviderSystem/Providers/GenericWebProvider.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index d06a6105..66d45707 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -224,6 +224,12 @@ class GenericWebProvider implements InfoProviderInterface return json_decode($json, true, 512, JSON_THROW_ON_ERROR); } + /** + * Gets the content of a meta tag by its name or property attribute, or null if not found + * @param Crawler $dom + * @param string $name + * @return string|null + */ private function getMetaContent(Crawler $dom, string $name): ?string { $meta = $dom->filter('meta[property="'.$name.'"]'); @@ -304,7 +310,7 @@ class GenericWebProvider implements InfoProviderInterface //Before loading the page, try to delegate to another provider $delegatedPart = $this->delegateToOtherProvider($url); if ($delegatedPart !== null) { - return $delegatedPart; + return $this->infoRetriever->getDetailsForSearchResult($delegatedPart); } }