diff --git a/src/Services/InfoProviderSystem/ProviderRegistry.php b/src/Services/InfoProviderSystem/ProviderRegistry.php index f6c398d2..18b8a37a 100644 --- a/src/Services/InfoProviderSystem/ProviderRegistry.php +++ b/src/Services/InfoProviderSystem/ProviderRegistry.php @@ -24,6 +24,7 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; /** * This class keeps track of all registered info providers and allows to find them by their key @@ -47,6 +48,8 @@ final class ProviderRegistry */ private array $providers_disabled = []; + private array $providers_by_domain = []; + /** * @var bool Whether the registry has been initialized */ @@ -78,6 +81,14 @@ final class ProviderRegistry $this->providers_by_name[$key] = $provider; if ($provider->isActive()) { $this->providers_active[$key] = $provider; + if ($provider instanceof URLHandlerInfoProviderInterface) { + foreach ($provider->getHandledDomains() as $domain) { + if (isset($this->providers_by_domain[$domain])) { + throw new \LogicException("Domain $domain is already handled by another provider"); + } + $this->providers_by_domain[$domain] = $provider; + } + } } else { $this->providers_disabled[$key] = $provider; } @@ -139,4 +150,29 @@ final class ProviderRegistry return $this->providers_disabled; } -} \ No newline at end of file + + public function getProviderHandlingDomain(string $domain): (InfoProviderInterface&URLHandlerInfoProviderInterface)|null + { + if (!$this->initialized) { + $this->initStructures(); + } + + //Check if the domain is directly existing: + if (isset($this->providers_by_domain[$domain])) { + return $this->providers_by_domain[$domain]; + } + + //Otherwise check for subdomains: + $parts = explode('.', $domain); + while (count($parts) > 2) { + array_shift($parts); + $check_domain = implode('.', $parts); + if (isset($this->providers_by_domain[$check_domain])) { + return $this->providers_by_domain[$check_domain]; + } + } + + //If we found nothing, return null + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/ConradProvider.php b/src/Services/InfoProviderSystem/Providers/ConradProvider.php index 6212f148..32434dee 100644 --- a/src/Services/InfoProviderSystem/Providers/ConradProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ConradProvider.php @@ -30,9 +30,10 @@ use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\ConradSettings; +use App\Settings\InfoProviderSystem\ConradShopIDs; use Symfony\Contracts\HttpClient\HttpClientInterface; -readonly class ConradProvider implements InfoProviderInterface +readonly class ConradProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const SEARCH_ENDPOINT = '/search/1/v3/facetSearch'; @@ -317,4 +318,26 @@ readonly class ConradProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + $domains = []; + foreach (ConradShopIDs::cases() as $shopID) { + $domains[] = $shopID->getDomain(); + } + return array_unique($domains); + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.conrad.de/de/p/apple-iphone-air-wolkenweiss-256-gb-eek-a-a-g-16-5-cm-6-5-zoll-3475299.html + //The numbers before the optional .html are the product ID + + $matches = []; + if (preg_match('/-(\d+)(\.html)?$/', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/Element14Provider.php b/src/Services/InfoProviderSystem/Providers/Element14Provider.php index 27dfb908..9ae45728 100644 --- a/src/Services/InfoProviderSystem/Providers/Element14Provider.php +++ b/src/Services/InfoProviderSystem/Providers/Element14Provider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\Element14Settings; use Composer\CaBundle\CaBundle; use Symfony\Contracts\HttpClient\HttpClientInterface; -class Element14Provider implements InfoProviderInterface +class Element14Provider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://api.element14.com/catalog/products'; @@ -309,4 +309,21 @@ class Element14Provider implements InfoProviderInterface ProviderCapabilities::DATASHEET, ]; } + + public function getHandledDomains(): array + { + return ['element14.com', 'farnell.com', 'newark.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input URL example: https://de.farnell.com/on-semiconductor/bc547b/transistor-npn-to-92/dp/1017673 + //The digits after the /dp/ are the part ID + $matches = []; + if (preg_match('#/dp/(\d+)#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index 4b73ad6e..66d45707 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -28,8 +28,10 @@ use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use App\Services\InfoProviderSystem\PartInfoRetriever; +use App\Services\InfoProviderSystem\ProviderRegistry; use App\Settings\InfoProviderSystem\GenericWebProviderSettings; -use PhpOffice\PhpSpreadsheet\Calculation\Financial\Securities\Price; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -40,7 +42,9 @@ class GenericWebProvider implements InfoProviderInterface private readonly HttpClientInterface $httpClient; - public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings) + public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, + private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever, + ) { $this->httpClient = $httpClient->withOptions( [ @@ -75,9 +79,17 @@ class GenericWebProvider implements InfoProviderInterface public function searchByKeyword(string $keyword): array { + $url = $this->fixAndValidateURL($keyword); + + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } + try { return [ - $this->getDetails($keyword) + $this->getDetails($keyword, false) //We already tried delegation ]; } catch (ProviderIDNotSupportedException $e) { return []; } @@ -212,6 +224,12 @@ class GenericWebProvider implements InfoProviderInterface return json_decode($json, true, 512, JSON_THROW_ON_ERROR); } + /** + * Gets the content of a meta tag by its name or property attribute, or null if not found + * @param Crawler $dom + * @param string $name + * @return string|null + */ private function getMetaContent(Crawler $dom, string $name): ?string { $meta = $dom->filter('meta[property="'.$name.'"]'); @@ -228,23 +246,72 @@ class GenericWebProvider implements InfoProviderInterface return null; } - public function getDetails(string $id): PartDetailDTO + /** + * Delegates the URL to another provider if possible, otherwise return null + * @param string $url + * @return SearchResultDTO|null + */ + private function delegateToOtherProvider(string $url): ?SearchResultDTO { - //Add scheme if missing - if (!preg_match('/^https?:\/\//', $id)) { - //Remove any leading slashes - $id = ltrim($id, '/'); - - $id = 'https://'.$id; + //Extract domain from url: + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return null; } - $url = $id; + $provider = $this->providerRegistry->getProviderHandlingDomain($host); + + if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) { + try { + $id = $provider->getIDFromURL($url); + if ($id !== null) { + $results = $this->infoRetriever->searchByKeyword($id, [$provider]); + if (count($results) > 0) { + return $results[0]; + } + } + return null; + } catch (ProviderIDNotSupportedException $e) { + //Ignore and continue + return null; + } + } + + return null; + } + + private function fixAndValidateURL(string $url): string + { + $originalUrl = $url; + + //Add scheme if missing + if (!preg_match('/^https?:\/\//', $url)) { + //Remove any leading slashes + $url = ltrim($url, '/'); + + $url = 'https://'.$url; + } //If this is not a valid URL with host, domain and path, throw an exception if (filter_var($url, FILTER_VALIDATE_URL) === false || parse_url($url, PHP_URL_HOST) === null || parse_url($url, PHP_URL_PATH) === null) { - throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id); + throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl); + } + + return $url; + } + + public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO + { + $url = $this->fixAndValidateURL($id); + + if ($check_for_delegation) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return $this->infoRetriever->getDetailsForSearchResult($delegatedPart); + } } //Try to get the webpage content diff --git a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php index ede34eb8..1b807eff 100755 --- a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php +++ b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\LCSCSettings; use Symfony\Component\HttpFoundation\Cookie; use Symfony\Contracts\HttpClient\HttpClientInterface; -class LCSCProvider implements BatchInfoProviderInterface +class LCSCProvider implements BatchInfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://wmsc.lcsc.com/ftps/wm'; @@ -452,4 +452,21 @@ class LCSCProvider implements BatchInfoProviderInterface ProviderCapabilities::FOOTPRINT, ]; } + + public function getHandledDomains(): array + { + return ['lcsc.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input example: https://www.lcsc.com/product-detail/C258144.html?s_z=n_BC547 + //The part between the "C" and the ".html" is the unique ID + + $matches = []; + if (preg_match("#/product-detail/(\w+)\.html#", $url, $matches) > 0) { + return $matches[1]; + } + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/PollinProvider.php b/src/Services/InfoProviderSystem/Providers/PollinProvider.php index 2c5d68a3..6ac969d3 100644 --- a/src/Services/InfoProviderSystem/Providers/PollinProvider.php +++ b/src/Services/InfoProviderSystem/Providers/PollinProvider.php @@ -36,7 +36,7 @@ use Symfony\Component\DependencyInjection\Attribute\Autowire; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; -class PollinProvider implements InfoProviderInterface +class PollinProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { public function __construct(private readonly HttpClientInterface $client, @@ -141,11 +141,16 @@ class PollinProvider implements InfoProviderInterface $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here //Calculate the mass - $massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); - //Remove the unit - $massStr = str_replace('kg', '', $massStr); - //Convert to float and convert to grams - $mass = (float) $massStr * 1000; + $massDom = $dom->filter('meta[itemprop="weight"]'); + if ($massDom->count() > 0) { + $massStr = $massDom->attr('content'); + $massStr = str_replace('kg', '', $massStr); + //Convert to float and convert to grams + $mass = (float) $massStr * 1000; + } else { + $mass = null; + } + //Parse purchase info $purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl); @@ -248,4 +253,22 @@ class PollinProvider implements InfoProviderInterface ProviderCapabilities::DATASHEET ]; } -} \ No newline at end of file + + public function getHandledDomains(): array + { + return ['pollin.de']; + } + + public function getIDFromURL(string $url): ?string + { + //URL like: https://www.pollin.de/p/shelly-bluetooth-schalter-und-dimmer-blu-zb-button-plug-play-mocha-592325 + + //Extract the 6-digit number at the end of the URL + $matches = []; + if (preg_match('/-(\d{6})(?:\/|$)/', $url, $matches)) { + return $matches[1]; + } + + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/TMEProvider.php b/src/Services/InfoProviderSystem/Providers/TMEProvider.php index 9bc73f09..938bc7b3 100644 --- a/src/Services/InfoProviderSystem/Providers/TMEProvider.php +++ b/src/Services/InfoProviderSystem/Providers/TMEProvider.php @@ -32,7 +32,7 @@ use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\TMESettings; -class TMEProvider implements InfoProviderInterface +class TMEProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const VENDOR_NAME = 'TME'; @@ -296,4 +296,22 @@ class TMEProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + return ['tme.eu']; + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.tme.eu/de/details/fi321_se/kuhler/alutronic/ + //The ID is the part after the details segment and before the next slash + + $matches = []; + if (preg_match('#/details/([^/]+)/#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php new file mode 100644 index 00000000..c0506648 --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php @@ -0,0 +1,43 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +/** + * If an interface + */ +interface URLHandlerInfoProviderInterface +{ + /** + * Returns a list of supported domains (e.g. ["digikey.com"]) + * @return array An array of supported domains + */ + public function getHandledDomains(): array; + + /** + * Extracts the unique ID of a part from a given URL. It is okay if this is not a canonical ID, as long as it can be used to uniquely identify the part within this provider. + * @param string $url The URL to extract the ID from + * @return string|null The extracted ID, or null if the URL is not valid for this provider + */ + public function getIDFromURL(string $url): ?string; +} diff --git a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php index 9026c5bf..48a1847f 100644 --- a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php +++ b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php @@ -24,6 +24,7 @@ namespace App\Tests\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\ProviderRegistry; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; use PHPUnit\Framework\TestCase; class ProviderRegistryTest extends TestCase @@ -44,9 +45,10 @@ class ProviderRegistryTest extends TestCase public function getMockProvider(string $key, bool $active = true): InfoProviderInterface { - $mock = $this->createMock(InfoProviderInterface::class); + $mock = $this->createMockForIntersectionOfInterfaces([InfoProviderInterface::class, URLHandlerInfoProviderInterface::class]); $mock->method('getProviderKey')->willReturn($key); $mock->method('isActive')->willReturn($active); + $mock->method('getHandledDomains')->willReturn(["$key.com", "test.$key.de"]); return $mock; } @@ -109,4 +111,18 @@ class ProviderRegistryTest extends TestCase $registry->getProviders(); } + + public function testGetProviderHandlingDomain(): void + { + $registry = new ProviderRegistry($this->providers); + + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('test1.com')); + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('www.test1.com')); //Subdomain should also work + + $this->assertEquals( + $this->providers[1], + $registry->getProviderHandlingDomain('test.test2.de') + ); + } + } diff --git a/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php b/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php index c40e141d..c5bdb02d 100644 --- a/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php +++ b/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php @@ -64,7 +64,7 @@ final class BarcodeRedirectorTest extends KernelTestCase { yield [new LocalBarcodeScanResult(LabelSupportedElement::PART, 1, BarcodeSourceType::INTERNAL), '/en/part/1']; //Part lot redirects to Part info page (Part lot 1 is associated with part 3) - yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3']; + yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3?highlightLot=1']; yield [new LocalBarcodeScanResult(LabelSupportedElement::STORELOCATION, 1, BarcodeSourceType::INTERNAL), '/en/store_location/1/parts']; }