diff --git a/docs/usage/information_provider_system.md b/docs/usage/information_provider_system.md index da8ea32b..6cdb5183 100644 --- a/docs/usage/information_provider_system.md +++ b/docs/usage/information_provider_system.md @@ -96,6 +96,21 @@ The following providers are currently available and shipped with Part-DB: (All trademarks are property of their respective owners. Part-DB is not affiliated with any of the companies.) +### Generic Web URL Provider +The Generic Web URL Provider can extract part information from any webpage that contains structured data in the form of +[Schema.org](https://schema.org/) format. Many e-commerce websites use this format to provide detailed product information +for search engines and other services. Therefore it allows Part-DB to retrieve rudimentary part information (like name, image and price) +from a wide range of websites without the need for a dedicated API integration. +To use the Generic Web URL Provider, simply enable it in the information provider settings. No additional configuration +is required. Afterwards you can enter any product URL in the search field, and Part-DB will attempt to extract the relevant part information +from the webpage. + +Please note that if this provider is enabled, Part-DB will make HTTP requests to external websites to fetch product data, which +may have privacy and security implications. + +Following env configuration options are available: +* `PROVIDER_GENERIC_WEB_ENABLED`: Set this to `1` to enable the Generic Web URL Provider (optional, default: `0`) + ### Octopart The Octopart provider uses the [Octopart / Nexar API](https://nexar.com/api) to search for parts and get information. diff --git a/src/Controller/InfoProviderController.php b/src/Controller/InfoProviderController.php index e5a5d87b..deec8a57 100644 --- a/src/Controller/InfoProviderController.php +++ b/src/Controller/InfoProviderController.php @@ -30,6 +30,7 @@ use App\Form\InfoProviderSystem\PartSearchType; use App\Services\InfoProviderSystem\ExistingPartFinder; use App\Services\InfoProviderSystem\PartInfoRetriever; use App\Services\InfoProviderSystem\ProviderRegistry; +use App\Services\InfoProviderSystem\Providers\GenericWebProvider; use App\Settings\AppSettings; use App\Settings\InfoProviderSystem\InfoProviderGeneralSettings; use Doctrine\ORM\EntityManagerInterface; @@ -39,6 +40,7 @@ use Psr\Log\LoggerInterface; use Symfony\Bridge\Doctrine\Attribute\MapEntity; use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; use Symfony\Component\Form\Extension\Core\Type\SubmitType; +use Symfony\Component\Form\Extension\Core\Type\UrlType; use Symfony\Component\HttpClient\Exception\ClientException; use Symfony\Component\HttpClient\Exception\TransportException; use Symfony\Component\HttpFoundation\Request; @@ -208,4 +210,58 @@ class InfoProviderController extends AbstractController 'update_target' => $update_target ]); } + + #[Route('/from_url', name: 'info_providers_from_url')] + public function fromURL(Request $request, GenericWebProvider $provider): Response + { + $this->denyAccessUnlessGranted('@info_providers.create_parts'); + + if (!$provider->isActive()) { + $this->addFlash('error', "Generic Web Provider is not active. Please enable it in the provider settings."); + return $this->redirectToRoute('info_providers_list'); + } + + $formBuilder = $this->createFormBuilder(); + $formBuilder->add('url', UrlType::class, [ + 'label' => 'info_providers.from_url.url.label', + 'required' => true, + ]); + $formBuilder->add('submit', SubmitType::class, [ + 'label' => 'info_providers.search.submit', + ]); + + $form = $formBuilder->getForm(); + $form->handleRequest($request); + + $partDetail = null; + if ($form->isSubmitted() && $form->isValid()) { + //Try to retrieve the part detail from the given URL + $url = $form->get('url')->getData(); + try { + $searchResult = $this->infoRetriever->searchByKeyword( + keyword: $url, + providers: [$provider] + ); + + if (count($searchResult) === 0) { + $this->addFlash('warning', t('info_providers.from_url.no_part_found')); + } else { + $searchResult = $searchResult[0]; + //Redirect to the part creation page with the found part detail + return $this->redirectToRoute('info_providers_create_part', [ + 'providerKey' => $searchResult->provider_key, + 'providerId' => $searchResult->provider_id, + ]); + } + } catch (ExceptionInterface $e) { + $this->addFlash('error', t('info_providers.search.error.general_exception', ['%type%' => (new \ReflectionClass($e))->getShortName()])); + } + } + + return $this->render('info_providers/from_url/from_url.html.twig', [ + 'form' => $form, + 'partDetail' => $partDetail, + ]); + + } } diff --git a/src/Exceptions/ProviderIDNotSupportedException.php b/src/Exceptions/ProviderIDNotSupportedException.php new file mode 100644 index 00000000..429f43ea --- /dev/null +++ b/src/Exceptions/ProviderIDNotSupportedException.php @@ -0,0 +1,32 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Exceptions; + +class ProviderIDNotSupportedException extends \RuntimeException +{ + public function fromProvider(string $providerKey, string $id): self + { + return new self(sprintf('The given ID %s is not supported by the provider %s.', $id, $providerKey,)); + } +} diff --git a/src/Services/InfoProviderSystem/ProviderRegistry.php b/src/Services/InfoProviderSystem/ProviderRegistry.php index f6c398d2..18b8a37a 100644 --- a/src/Services/InfoProviderSystem/ProviderRegistry.php +++ b/src/Services/InfoProviderSystem/ProviderRegistry.php @@ -24,6 +24,7 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; /** * This class keeps track of all registered info providers and allows to find them by their key @@ -47,6 +48,8 @@ final class ProviderRegistry */ private array $providers_disabled = []; + private array $providers_by_domain = []; + /** * @var bool Whether the registry has been initialized */ @@ -78,6 +81,14 @@ final class ProviderRegistry $this->providers_by_name[$key] = $provider; if ($provider->isActive()) { $this->providers_active[$key] = $provider; + if ($provider instanceof URLHandlerInfoProviderInterface) { + foreach ($provider->getHandledDomains() as $domain) { + if (isset($this->providers_by_domain[$domain])) { + throw new \LogicException("Domain $domain is already handled by another provider"); + } + $this->providers_by_domain[$domain] = $provider; + } + } } else { $this->providers_disabled[$key] = $provider; } @@ -139,4 +150,29 @@ final class ProviderRegistry return $this->providers_disabled; } -} \ No newline at end of file + + public function getProviderHandlingDomain(string $domain): (InfoProviderInterface&URLHandlerInfoProviderInterface)|null + { + if (!$this->initialized) { + $this->initStructures(); + } + + //Check if the domain is directly existing: + if (isset($this->providers_by_domain[$domain])) { + return $this->providers_by_domain[$domain]; + } + + //Otherwise check for subdomains: + $parts = explode('.', $domain); + while (count($parts) > 2) { + array_shift($parts); + $check_domain = implode('.', $parts); + if (isset($this->providers_by_domain[$check_domain])) { + return $this->providers_by_domain[$check_domain]; + } + } + + //If we found nothing, return null + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/ConradProvider.php b/src/Services/InfoProviderSystem/Providers/ConradProvider.php index 6212f148..32434dee 100644 --- a/src/Services/InfoProviderSystem/Providers/ConradProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ConradProvider.php @@ -30,9 +30,10 @@ use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\ConradSettings; +use App\Settings\InfoProviderSystem\ConradShopIDs; use Symfony\Contracts\HttpClient\HttpClientInterface; -readonly class ConradProvider implements InfoProviderInterface +readonly class ConradProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const SEARCH_ENDPOINT = '/search/1/v3/facetSearch'; @@ -317,4 +318,26 @@ readonly class ConradProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + $domains = []; + foreach (ConradShopIDs::cases() as $shopID) { + $domains[] = $shopID->getDomain(); + } + return array_unique($domains); + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.conrad.de/de/p/apple-iphone-air-wolkenweiss-256-gb-eek-a-a-g-16-5-cm-6-5-zoll-3475299.html + //The numbers before the optional .html are the product ID + + $matches = []; + if (preg_match('/-(\d+)(\.html)?$/', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/Element14Provider.php b/src/Services/InfoProviderSystem/Providers/Element14Provider.php index 27dfb908..9ae45728 100644 --- a/src/Services/InfoProviderSystem/Providers/Element14Provider.php +++ b/src/Services/InfoProviderSystem/Providers/Element14Provider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\Element14Settings; use Composer\CaBundle\CaBundle; use Symfony\Contracts\HttpClient\HttpClientInterface; -class Element14Provider implements InfoProviderInterface +class Element14Provider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://api.element14.com/catalog/products'; @@ -309,4 +309,21 @@ class Element14Provider implements InfoProviderInterface ProviderCapabilities::DATASHEET, ]; } + + public function getHandledDomains(): array + { + return ['element14.com', 'farnell.com', 'newark.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input URL example: https://de.farnell.com/on-semiconductor/bc547b/transistor-npn-to-92/dp/1017673 + //The digits after the /dp/ are the part ID + $matches = []; + if (preg_match('#/dp/(\d+)#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php new file mode 100644 index 00000000..66d45707 --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -0,0 +1,403 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +use App\Exceptions\ProviderIDNotSupportedException; +use App\Services\InfoProviderSystem\DTOs\ParameterDTO; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\PriceDTO; +use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use App\Services\InfoProviderSystem\PartInfoRetriever; +use App\Services\InfoProviderSystem\ProviderRegistry; +use App\Settings\InfoProviderSystem\GenericWebProviderSettings; +use Symfony\Component\DomCrawler\Crawler; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +class GenericWebProvider implements InfoProviderInterface +{ + + public const DISTRIBUTOR_NAME = 'Website'; + + private readonly HttpClientInterface $httpClient; + + public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, + private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever, + ) + { + $this->httpClient = $httpClient->withOptions( + [ + 'headers' => [ + 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36', + ], + 'timeout' => 15, + ] + ); + } + + public function getProviderInfo(): array + { + return [ + 'name' => 'Generic Web URL', + 'description' => 'Tries to extract a part from a given product webpage URL using common metadata standards like JSON-LD and OpenGraph.', + //'url' => 'https://example.com', + 'disabled_help' => 'Enable in settings to use this provider', + 'settings_class' => GenericWebProviderSettings::class, + ]; + } + + public function getProviderKey(): string + { + return 'generic_web'; + } + + public function isActive(): bool + { + return $this->settings->enabled; + } + + public function searchByKeyword(string $keyword): array + { + $url = $this->fixAndValidateURL($keyword); + + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return [$delegatedPart]; + } + + try { + return [ + $this->getDetails($keyword, false) //We already tried delegation + ]; } catch (ProviderIDNotSupportedException $e) { + return []; + } + } + + private function extractShopName(string $url): string + { + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return self::DISTRIBUTOR_NAME; + } + return $host; + } + + private function productJsonLdToPart(array $jsonLd, string $url, Crawler $dom): PartDetailDTO + { + $notes = $jsonLd['description'] ?? ""; + if (isset($jsonLd['disambiguatingDescription'])) { + if (!empty($notes)) { + $notes .= "\n\n"; + } + $notes .= $jsonLd['disambiguatingDescription']; + } + + $vendor_infos = null; + if (isset($jsonLd['offers'])) { + + if (array_is_list($jsonLd['offers'])) { + $offer = $jsonLd['offers'][0]; + } else { + $offer = $jsonLd['offers']; + } + + //Make $jsonLd['url'] absolute if it's relative + if (isset($jsonLd['url']) && parse_url($jsonLd['url'], PHP_URL_SCHEME) === null) { + $parsedUrl = parse_url($url); + $scheme = $parsedUrl['scheme'] ?? 'https'; + $host = $parsedUrl['host'] ?? ''; + $jsonLd['url'] = $scheme.'://'.$host.$jsonLd['url']; + } + + $prices = []; + if (isset($offer['price'])) { + $prices[] = new PriceDTO( + minimum_discount_amount: 1, + price: (string) $offer['price'], + currency_iso_code: $offer['priceCurrency'] ?? null + ); + } else if (isset($offer['offers']) && array_is_list($offer['offers'])) { + //Some sites nest offers + foreach ($offer['offers'] as $subOffer) { + if (isset($subOffer['price'])) { + $prices[] = new PriceDTO( + minimum_discount_amount: 1, + price: (string) $subOffer['price'], + currency_iso_code: $subOffer['priceCurrency'] ?? null + ); + } + } + } + + $vendor_infos = [new PurchaseInfoDTO( + distributor_name: $this->extractShopName($url), + order_number: (string) ($jsonLd['sku'] ?? $jsonLd['@id'] ?? $jsonLd['gtin'] ?? 'Unknown'), + prices: $prices, + product_url: $jsonLd['url'] ?? $url, + )]; + } + + $image = null; + if (isset($jsonLd['image'])) { + if (is_array($jsonLd['image'])) { + if (array_is_list($jsonLd['image'])) { + $image = $jsonLd['image'][0] ?? null; + } + } elseif (is_string($jsonLd['image'])) { + $image = $jsonLd['image']; + } + } + //If image is an object with @type ImageObject, extract the url + if (is_array($image) && isset($image['@type']) && $image['@type'] === 'ImageObject') { + $image = $image['contentUrl'] ?? $image['url'] ?? null; + } + + //Try to extract parameters from additionalProperty + $parameters = []; + if (isset($jsonLd['additionalProperty']) && array_is_list($jsonLd['additionalProperty'])) { + foreach ($jsonLd['additionalProperty'] as $property) { //TODO: Handle minValue and maxValue + if (isset ($property['unitText'])) { + $parameters[] = ParameterDTO::parseValueField( + name: $property['name'] ?? 'Unknown', + value: $property['value'] ?? '', + unit: $property['unitText'] + ); + } else { + $parameters[] = ParameterDTO::parseValueIncludingUnit( + name: $property['name'] ?? 'Unknown', + value: $property['value'] ?? '' + ); + } + } + } + + + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $url, + name: $jsonLd ['name'] ?? 'Unknown Name', + description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '', + category: isset($jsonLd['category']) && is_string($jsonLd['category']) ? $jsonLd['category'] : null, + manufacturer: $jsonLd['manufacturer']['name'] ?? $jsonLd['brand']['name'] ?? null, + mpn: $jsonLd['mpn'] ?? null, + preview_image_url: $image, + provider_url: $url, + notes: $notes, + parameters: $parameters, + vendor_infos: $vendor_infos, + mass: isset($jsonLd['weight']['value']) ? (float)$jsonLd['weight']['value'] : null, + ); + } + + /** + * Decodes JSON in a forgiving way, trying to fix common issues. + * @param string $json + * @return array + * @throws \JsonException + */ + private function json_decode_forgiving(string $json): array + { + //Sanitize common issues + $json = preg_replace("/[\r\n]+/", " ", $json); + return json_decode($json, true, 512, JSON_THROW_ON_ERROR); + } + + /** + * Gets the content of a meta tag by its name or property attribute, or null if not found + * @param Crawler $dom + * @param string $name + * @return string|null + */ + private function getMetaContent(Crawler $dom, string $name): ?string + { + $meta = $dom->filter('meta[property="'.$name.'"]'); + if ($meta->count() > 0) { + return $meta->attr('content'); + } + + //Try name attribute + $meta = $dom->filter('meta[name="'.$name.'"]'); + if ($meta->count() > 0) { + return $meta->attr('content'); + } + + return null; + } + + /** + * Delegates the URL to another provider if possible, otherwise return null + * @param string $url + * @return SearchResultDTO|null + */ + private function delegateToOtherProvider(string $url): ?SearchResultDTO + { + //Extract domain from url: + $host = parse_url($url, PHP_URL_HOST); + if ($host === false || $host === null) { + return null; + } + + $provider = $this->providerRegistry->getProviderHandlingDomain($host); + + if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) { + try { + $id = $provider->getIDFromURL($url); + if ($id !== null) { + $results = $this->infoRetriever->searchByKeyword($id, [$provider]); + if (count($results) > 0) { + return $results[0]; + } + } + return null; + } catch (ProviderIDNotSupportedException $e) { + //Ignore and continue + return null; + } + } + + return null; + } + + private function fixAndValidateURL(string $url): string + { + $originalUrl = $url; + + //Add scheme if missing + if (!preg_match('/^https?:\/\//', $url)) { + //Remove any leading slashes + $url = ltrim($url, '/'); + + $url = 'https://'.$url; + } + + //If this is not a valid URL with host, domain and path, throw an exception + if (filter_var($url, FILTER_VALIDATE_URL) === false || + parse_url($url, PHP_URL_HOST) === null || + parse_url($url, PHP_URL_PATH) === null) { + throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl); + } + + return $url; + } + + public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO + { + $url = $this->fixAndValidateURL($id); + + if ($check_for_delegation) { + //Before loading the page, try to delegate to another provider + $delegatedPart = $this->delegateToOtherProvider($url); + if ($delegatedPart !== null) { + return $this->infoRetriever->getDetailsForSearchResult($delegatedPart); + } + } + + //Try to get the webpage content + $response = $this->httpClient->request('GET', $url); + $content = $response->getContent(); + + $dom = new Crawler($content); + + //Try to determine a canonical URL + $canonicalURL = $url; + if ($dom->filter('link[rel="canonical"]')->count() > 0) { + $canonicalURL = $dom->filter('link[rel="canonical"]')->attr('href'); + } else if ($dom->filter('meta[property="og:url"]')->count() > 0) { + $canonicalURL = $dom->filter('meta[property="og:url"]')->attr('content'); + } + + //If the canonical URL is relative, make it absolute + if (parse_url($canonicalURL, PHP_URL_SCHEME) === null) { + $parsedUrl = parse_url($url); + $scheme = $parsedUrl['scheme'] ?? 'https'; + $host = $parsedUrl['host'] ?? ''; + $canonicalURL = $scheme.'://'.$host.$canonicalURL; + } + + //Try to find json-ld data in the head + $jsonLdNodes = $dom->filter('script[type="application/ld+json"]'); + foreach ($jsonLdNodes as $node) { + $jsonLd = $this->json_decode_forgiving($node->textContent); + //If the content of json-ld is an array, try to find a product inside + if (!array_is_list($jsonLd)) { + $jsonLd = [$jsonLd]; + } + foreach ($jsonLd as $item) { + if (isset($item['@type']) && $item['@type'] === 'Product') { + return $this->productJsonLdToPart($item, $canonicalURL, $dom); + } + } + } + + //If no JSON-LD data is found, try to extract basic data from meta tags + $pageTitle = $dom->filter('title')->count() > 0 ? $dom->filter('title')->text() : 'Unknown'; + + $prices = []; + if ($price = $this->getMetaContent($dom, 'product:price:amount')) { + $prices[] = new PriceDTO( + minimum_discount_amount: 1, + price: $price, + currency_iso_code: $this->getMetaContent($dom, 'product:price:currency'), + ); + } else { + //Amazon fallback + $amazonAmount = $dom->filter('input[type="hidden"][name*="amount"]'); + if ($amazonAmount->count() > 0) { + $prices[] = new PriceDTO( + minimum_discount_amount: 1, + price: $amazonAmount->first()->attr('value'), + currency_iso_code: $dom->filter('input[type="hidden"][name*="currencyCode"]')->first()->attr('value'), + ); + } + } + + $vendor_infos = [new PurchaseInfoDTO( + distributor_name: $this->extractShopName($canonicalURL), + order_number: 'Unknown', + prices: $prices, + product_url: $canonicalURL, + )]; + + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $canonicalURL, + name: $this->getMetaContent($dom, 'og:title') ?? $pageTitle, + description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '', + manufacturer: $this->getMetaContent($dom, 'product:brand'), + preview_image_url: $this->getMetaContent($dom, 'og:image'), + provider_url: $canonicalURL, + vendor_infos: $vendor_infos, + ); + } + + public function getCapabilities(): array + { + return [ + ProviderCapabilities::BASIC, + ProviderCapabilities::PICTURE, + ProviderCapabilities::PRICE + ]; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php index ede34eb8..1b807eff 100755 --- a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php +++ b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php @@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\LCSCSettings; use Symfony\Component\HttpFoundation\Cookie; use Symfony\Contracts\HttpClient\HttpClientInterface; -class LCSCProvider implements BatchInfoProviderInterface +class LCSCProvider implements BatchInfoProviderInterface, URLHandlerInfoProviderInterface { private const ENDPOINT_URL = 'https://wmsc.lcsc.com/ftps/wm'; @@ -452,4 +452,21 @@ class LCSCProvider implements BatchInfoProviderInterface ProviderCapabilities::FOOTPRINT, ]; } + + public function getHandledDomains(): array + { + return ['lcsc.com']; + } + + public function getIDFromURL(string $url): ?string + { + //Input example: https://www.lcsc.com/product-detail/C258144.html?s_z=n_BC547 + //The part between the "C" and the ".html" is the unique ID + + $matches = []; + if (preg_match("#/product-detail/(\w+)\.html#", $url, $matches) > 0) { + return $matches[1]; + } + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/PollinProvider.php b/src/Services/InfoProviderSystem/Providers/PollinProvider.php index 2c5d68a3..6ac969d3 100644 --- a/src/Services/InfoProviderSystem/Providers/PollinProvider.php +++ b/src/Services/InfoProviderSystem/Providers/PollinProvider.php @@ -36,7 +36,7 @@ use Symfony\Component\DependencyInjection\Attribute\Autowire; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; -class PollinProvider implements InfoProviderInterface +class PollinProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { public function __construct(private readonly HttpClientInterface $client, @@ -141,11 +141,16 @@ class PollinProvider implements InfoProviderInterface $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here //Calculate the mass - $massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); - //Remove the unit - $massStr = str_replace('kg', '', $massStr); - //Convert to float and convert to grams - $mass = (float) $massStr * 1000; + $massDom = $dom->filter('meta[itemprop="weight"]'); + if ($massDom->count() > 0) { + $massStr = $massDom->attr('content'); + $massStr = str_replace('kg', '', $massStr); + //Convert to float and convert to grams + $mass = (float) $massStr * 1000; + } else { + $mass = null; + } + //Parse purchase info $purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl); @@ -248,4 +253,22 @@ class PollinProvider implements InfoProviderInterface ProviderCapabilities::DATASHEET ]; } -} \ No newline at end of file + + public function getHandledDomains(): array + { + return ['pollin.de']; + } + + public function getIDFromURL(string $url): ?string + { + //URL like: https://www.pollin.de/p/shelly-bluetooth-schalter-und-dimmer-blu-zb-button-plug-play-mocha-592325 + + //Extract the 6-digit number at the end of the URL + $matches = []; + if (preg_match('/-(\d{6})(?:\/|$)/', $url, $matches)) { + return $matches[1]; + } + + return null; + } +} diff --git a/src/Services/InfoProviderSystem/Providers/TMEProvider.php b/src/Services/InfoProviderSystem/Providers/TMEProvider.php index 9bc73f09..938bc7b3 100644 --- a/src/Services/InfoProviderSystem/Providers/TMEProvider.php +++ b/src/Services/InfoProviderSystem/Providers/TMEProvider.php @@ -32,7 +32,7 @@ use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Settings\InfoProviderSystem\TMESettings; -class TMEProvider implements InfoProviderInterface +class TMEProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface { private const VENDOR_NAME = 'TME'; @@ -296,4 +296,22 @@ class TMEProvider implements InfoProviderInterface ProviderCapabilities::PRICE, ]; } + + public function getHandledDomains(): array + { + return ['tme.eu']; + } + + public function getIDFromURL(string $url): ?string + { + //Input: https://www.tme.eu/de/details/fi321_se/kuhler/alutronic/ + //The ID is the part after the details segment and before the next slash + + $matches = []; + if (preg_match('#/details/([^/]+)/#', $url, $matches) === 1) { + return $matches[1]; + } + + return null; + } } diff --git a/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php new file mode 100644 index 00000000..c0506648 --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/URLHandlerInfoProviderInterface.php @@ -0,0 +1,43 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +/** + * If an interface + */ +interface URLHandlerInfoProviderInterface +{ + /** + * Returns a list of supported domains (e.g. ["digikey.com"]) + * @return array An array of supported domains + */ + public function getHandledDomains(): array; + + /** + * Extracts the unique ID of a part from a given URL. It is okay if this is not a canonical ID, as long as it can be used to uniquely identify the part within this provider. + * @param string $url The URL to extract the ID from + * @return string|null The extracted ID, or null if the URL is not valid for this provider + */ + public function getIDFromURL(string $url): ?string; +} diff --git a/src/Services/Trees/ToolsTreeBuilder.php b/src/Services/Trees/ToolsTreeBuilder.php index 37a09b09..c8afac12 100644 --- a/src/Services/Trees/ToolsTreeBuilder.php +++ b/src/Services/Trees/ToolsTreeBuilder.php @@ -39,6 +39,8 @@ use App\Entity\UserSystem\User; use App\Helpers\Trees\TreeViewNode; use App\Services\Cache\UserCacheKeyGenerator; use App\Services\ElementTypeNameGenerator; +use App\Services\InfoProviderSystem\Providers\GenericWebProvider; +use App\Settings\InfoProviderSystem\GenericWebProviderSettings; use Symfony\Bundle\SecurityBundle\Security; use Symfony\Component\Routing\Generator\UrlGeneratorInterface; use Symfony\Contracts\Cache\ItemInterface; @@ -58,6 +60,7 @@ class ToolsTreeBuilder protected UserCacheKeyGenerator $keyGenerator, protected Security $security, private readonly ElementTypeNameGenerator $elementTypeNameGenerator, + private readonly GenericWebProviderSettings $genericWebProviderSettings ) { } @@ -147,6 +150,13 @@ class ToolsTreeBuilder $this->urlGenerator->generate('info_providers_search') ))->setIcon('fa-treeview fa-fw fa-solid fa-cloud-arrow-down'); + if ($this->genericWebProviderSettings->enabled) { + $nodes[] = (new TreeViewNode( + $this->translator->trans('info_providers.from_url.title'), + $this->urlGenerator->generate('info_providers_from_url') + ))->setIcon('fa-treeview fa-fw fa-solid fa-book-atlas'); + } + $nodes[] = (new TreeViewNode( $this->translator->trans('info_providers.bulk_import.manage_jobs'), $this->urlGenerator->generate('bulk_info_provider_manage') diff --git a/src/Settings/InfoProviderSystem/GenericWebProviderSettings.php b/src/Settings/InfoProviderSystem/GenericWebProviderSettings.php new file mode 100644 index 00000000..07972141 --- /dev/null +++ b/src/Settings/InfoProviderSystem/GenericWebProviderSettings.php @@ -0,0 +1,43 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Settings\InfoProviderSystem; + +use App\Settings\SettingsIcon; +use Jbtronics\SettingsBundle\Metadata\EnvVarMode; +use Jbtronics\SettingsBundle\Settings\Settings; +use Jbtronics\SettingsBundle\Settings\SettingsParameter; +use Jbtronics\SettingsBundle\Settings\SettingsTrait; +use Symfony\Component\Translation\TranslatableMessage as TM; + +#[Settings(name: "generic_web_provider", label: new TM("settings.ips.generic_web_provider"), description: new TM("settings.ips.generic_web_provider.description"))] +#[SettingsIcon("fa-plug")] +class GenericWebProviderSettings +{ + use SettingsTrait; + + #[SettingsParameter(label: new TM("settings.ips.lcsc.enabled"), description: new TM("settings.ips.generic_web_provider.enabled.help"), + envVar: "bool:PROVIDER_GENERIC_WEB_ENABLED", envVarMode: EnvVarMode::OVERWRITE + )] + public bool $enabled = false; +} diff --git a/src/Settings/InfoProviderSystem/InfoProviderSettings.php b/src/Settings/InfoProviderSystem/InfoProviderSettings.php index fb31bdb9..3e78233f 100644 --- a/src/Settings/InfoProviderSystem/InfoProviderSettings.php +++ b/src/Settings/InfoProviderSystem/InfoProviderSettings.php @@ -37,6 +37,9 @@ class InfoProviderSettings #[EmbeddedSettings] public ?InfoProviderGeneralSettings $general = null; + #[EmbeddedSettings] + public ?GenericWebProviderSettings $genericWebProvider = null; + #[EmbeddedSettings] public ?DigikeySettings $digikey = null; diff --git a/templates/_navbar.html.twig b/templates/_navbar.html.twig index 446ccdab..c4dfbe0f 100644 --- a/templates/_navbar.html.twig +++ b/templates/_navbar.html.twig @@ -10,9 +10,9 @@ - {% if is_granted("@tools.label_scanner") %} + {% if is_granted("@tools.label_scanner") %} - + {% endif %} @@ -52,6 +52,14 @@ {% trans %}info_providers.search.title{% endtrans %} + {% if settings_instance('generic_web_provider').enabled %} +
  • + + + {% trans %}info_providers.from_url.title{% endtrans %} + +
  • + {% endif %} {% endif %} {% if is_granted('@parts.import') %} @@ -69,7 +77,7 @@ {% if is_granted('@parts.read') %} {{ search.search_form("navbar") }} - {# {% include "_navbar_search.html.twig" %} #} + {# {% include "_navbar_search.html.twig" %} #} {% endif %} @@ -145,4 +153,4 @@ - \ No newline at end of file + diff --git a/templates/info_providers/from_url/from_url.html.twig b/templates/info_providers/from_url/from_url.html.twig new file mode 100644 index 00000000..5aad1a03 --- /dev/null +++ b/templates/info_providers/from_url/from_url.html.twig @@ -0,0 +1,21 @@ +{% extends "main_card.html.twig" %} + +{% import "info_providers/providers.macro.html.twig" as providers_macro %} +{% import "helper.twig" as helper %} + +{% block title %} + {% trans %}info_providers.from_url.title{% endtrans %} +{% endblock %} + +{% block card_title %} + {% trans %}info_providers.from_url.title{% endtrans %} +{% endblock %} + +{% block card_content %} +

    {% trans %}info_providers.from_url.help{% endtrans %}

    + + {{ form_start(form) }} + {{ form_row(form.url) }} + {{ form_row(form.submit) }} + {{ form_end(form) }} +{% endblock %} diff --git a/templates/info_providers/settings/provider_settings.html.twig b/templates/info_providers/settings/provider_settings.html.twig index 1876c2eb..86e5bc9b 100644 --- a/templates/info_providers/settings/provider_settings.html.twig +++ b/templates/info_providers/settings/provider_settings.html.twig @@ -10,7 +10,7 @@ {% block card_content %}

    - {% if info_provider_info.url %} + {% if info_provider_info.url is defined %} {{ info_provider_info.name }} {% else %} {{ info_provider_info.name }} diff --git a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php index 9026c5bf..48a1847f 100644 --- a/tests/Services/InfoProviderSystem/ProviderRegistryTest.php +++ b/tests/Services/InfoProviderSystem/ProviderRegistryTest.php @@ -24,6 +24,7 @@ namespace App\Tests\Services\InfoProviderSystem; use App\Services\InfoProviderSystem\ProviderRegistry; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; +use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface; use PHPUnit\Framework\TestCase; class ProviderRegistryTest extends TestCase @@ -44,9 +45,10 @@ class ProviderRegistryTest extends TestCase public function getMockProvider(string $key, bool $active = true): InfoProviderInterface { - $mock = $this->createMock(InfoProviderInterface::class); + $mock = $this->createMockForIntersectionOfInterfaces([InfoProviderInterface::class, URLHandlerInfoProviderInterface::class]); $mock->method('getProviderKey')->willReturn($key); $mock->method('isActive')->willReturn($active); + $mock->method('getHandledDomains')->willReturn(["$key.com", "test.$key.de"]); return $mock; } @@ -109,4 +111,18 @@ class ProviderRegistryTest extends TestCase $registry->getProviders(); } + + public function testGetProviderHandlingDomain(): void + { + $registry = new ProviderRegistry($this->providers); + + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('test1.com')); + $this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('www.test1.com')); //Subdomain should also work + + $this->assertEquals( + $this->providers[1], + $registry->getProviderHandlingDomain('test.test2.de') + ); + } + } diff --git a/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php b/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php index c40e141d..c5bdb02d 100644 --- a/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php +++ b/tests/Services/LabelSystem/BarcodeScanner/BarcodeRedirectorTest.php @@ -64,7 +64,7 @@ final class BarcodeRedirectorTest extends KernelTestCase { yield [new LocalBarcodeScanResult(LabelSupportedElement::PART, 1, BarcodeSourceType::INTERNAL), '/en/part/1']; //Part lot redirects to Part info page (Part lot 1 is associated with part 3) - yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3']; + yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3?highlightLot=1']; yield [new LocalBarcodeScanResult(LabelSupportedElement::STORELOCATION, 1, BarcodeSourceType::INTERNAL), '/en/store_location/1/parts']; } diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index b2bd908e..87f6c2f6 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -14316,5 +14316,47 @@ Buerklin-API Authentication server: Only includes attachments in the selected languages in the results. + + + settings.ips.generic_web_provider + Generic Web URL Provider + + + + + settings.ips.generic_web_provider.description + This info provider allows to retrieve basic part information from many shop page URLs. + + + + + settings.ips.generic_web_provider.enabled.help + When the provider is enabled, users can make requests to arbitary websites on behalf of the Part-DB server. Only enable this, if you are aware of the potential consequences. + + + + + info_providers.from_url.title + Create [part] from URL + + + + + info_providers.from_url.url.label + URL + + + + + info_providers.from_url.no_part_found + No part found from the given URL. Are you sure this is a valid shop URL? + + + + + info_providers.from_url.help + Creates a part based on the given URL. It tries to delegate it to an existing info provider if possible, otherwise it will be tried to extract rudimentary data from the webpage's metadata. + +