Merge branch 'master' into feature/update-manager

This commit is contained in:
Jan Böhmer 2026-02-02 16:43:02 +01:00
commit 599145886b
10 changed files with 286 additions and 26 deletions

View file

@ -24,6 +24,7 @@ declare(strict_types=1);
namespace App\Services\InfoProviderSystem; namespace App\Services\InfoProviderSystem;
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface;
/** /**
* This class keeps track of all registered info providers and allows to find them by their key * This class keeps track of all registered info providers and allows to find them by their key
@ -47,6 +48,8 @@ final class ProviderRegistry
*/ */
private array $providers_disabled = []; private array $providers_disabled = [];
private array $providers_by_domain = [];
/** /**
* @var bool Whether the registry has been initialized * @var bool Whether the registry has been initialized
*/ */
@ -78,6 +81,14 @@ final class ProviderRegistry
$this->providers_by_name[$key] = $provider; $this->providers_by_name[$key] = $provider;
if ($provider->isActive()) { if ($provider->isActive()) {
$this->providers_active[$key] = $provider; $this->providers_active[$key] = $provider;
if ($provider instanceof URLHandlerInfoProviderInterface) {
foreach ($provider->getHandledDomains() as $domain) {
if (isset($this->providers_by_domain[$domain])) {
throw new \LogicException("Domain $domain is already handled by another provider");
}
$this->providers_by_domain[$domain] = $provider;
}
}
} else { } else {
$this->providers_disabled[$key] = $provider; $this->providers_disabled[$key] = $provider;
} }
@ -139,4 +150,29 @@ final class ProviderRegistry
return $this->providers_disabled; return $this->providers_disabled;
} }
}
public function getProviderHandlingDomain(string $domain): (InfoProviderInterface&URLHandlerInfoProviderInterface)|null
{
if (!$this->initialized) {
$this->initStructures();
}
//Check if the domain is directly existing:
if (isset($this->providers_by_domain[$domain])) {
return $this->providers_by_domain[$domain];
}
//Otherwise check for subdomains:
$parts = explode('.', $domain);
while (count($parts) > 2) {
array_shift($parts);
$check_domain = implode('.', $parts);
if (isset($this->providers_by_domain[$check_domain])) {
return $this->providers_by_domain[$check_domain];
}
}
//If we found nothing, return null
return null;
}
}

View file

@ -30,9 +30,10 @@ use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use App\Settings\InfoProviderSystem\ConradSettings; use App\Settings\InfoProviderSystem\ConradSettings;
use App\Settings\InfoProviderSystem\ConradShopIDs;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
readonly class ConradProvider implements InfoProviderInterface readonly class ConradProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface
{ {
private const SEARCH_ENDPOINT = '/search/1/v3/facetSearch'; private const SEARCH_ENDPOINT = '/search/1/v3/facetSearch';
@ -317,4 +318,26 @@ readonly class ConradProvider implements InfoProviderInterface
ProviderCapabilities::PRICE, ProviderCapabilities::PRICE,
]; ];
} }
public function getHandledDomains(): array
{
$domains = [];
foreach (ConradShopIDs::cases() as $shopID) {
$domains[] = $shopID->getDomain();
}
return array_unique($domains);
}
public function getIDFromURL(string $url): ?string
{
//Input: https://www.conrad.de/de/p/apple-iphone-air-wolkenweiss-256-gb-eek-a-a-g-16-5-cm-6-5-zoll-3475299.html
//The numbers before the optional .html are the product ID
$matches = [];
if (preg_match('/-(\d+)(\.html)?$/', $url, $matches) === 1) {
return $matches[1];
}
return null;
}
} }

View file

@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\Element14Settings;
use Composer\CaBundle\CaBundle; use Composer\CaBundle\CaBundle;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
class Element14Provider implements InfoProviderInterface class Element14Provider implements InfoProviderInterface, URLHandlerInfoProviderInterface
{ {
private const ENDPOINT_URL = 'https://api.element14.com/catalog/products'; private const ENDPOINT_URL = 'https://api.element14.com/catalog/products';
@ -309,4 +309,21 @@ class Element14Provider implements InfoProviderInterface
ProviderCapabilities::DATASHEET, ProviderCapabilities::DATASHEET,
]; ];
} }
public function getHandledDomains(): array
{
return ['element14.com', 'farnell.com', 'newark.com'];
}
public function getIDFromURL(string $url): ?string
{
//Input URL example: https://de.farnell.com/on-semiconductor/bc547b/transistor-npn-to-92/dp/1017673
//The digits after the /dp/ are the part ID
$matches = [];
if (preg_match('#/dp/(\d+)#', $url, $matches) === 1) {
return $matches[1];
}
return null;
}
} }

View file

@ -28,8 +28,10 @@ use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use App\Services\InfoProviderSystem\PartInfoRetriever;
use App\Services\InfoProviderSystem\ProviderRegistry;
use App\Settings\InfoProviderSystem\GenericWebProviderSettings; use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
use PhpOffice\PhpSpreadsheet\Calculation\Financial\Securities\Price;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
@ -40,7 +42,9 @@ class GenericWebProvider implements InfoProviderInterface
private readonly HttpClientInterface $httpClient; private readonly HttpClientInterface $httpClient;
public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings) public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings,
private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever,
)
{ {
$this->httpClient = $httpClient->withOptions( $this->httpClient = $httpClient->withOptions(
[ [
@ -75,9 +79,17 @@ class GenericWebProvider implements InfoProviderInterface
public function searchByKeyword(string $keyword): array public function searchByKeyword(string $keyword): array
{ {
$url = $this->fixAndValidateURL($keyword);
//Before loading the page, try to delegate to another provider
$delegatedPart = $this->delegateToOtherProvider($url);
if ($delegatedPart !== null) {
return [$delegatedPart];
}
try { try {
return [ return [
$this->getDetails($keyword) $this->getDetails($keyword, false) //We already tried delegation
]; } catch (ProviderIDNotSupportedException $e) { ]; } catch (ProviderIDNotSupportedException $e) {
return []; return [];
} }
@ -212,6 +224,12 @@ class GenericWebProvider implements InfoProviderInterface
return json_decode($json, true, 512, JSON_THROW_ON_ERROR); return json_decode($json, true, 512, JSON_THROW_ON_ERROR);
} }
/**
* Gets the content of a meta tag by its name or property attribute, or null if not found
* @param Crawler $dom
* @param string $name
* @return string|null
*/
private function getMetaContent(Crawler $dom, string $name): ?string private function getMetaContent(Crawler $dom, string $name): ?string
{ {
$meta = $dom->filter('meta[property="'.$name.'"]'); $meta = $dom->filter('meta[property="'.$name.'"]');
@ -228,23 +246,72 @@ class GenericWebProvider implements InfoProviderInterface
return null; return null;
} }
public function getDetails(string $id): PartDetailDTO /**
* Delegates the URL to another provider if possible, otherwise return null
* @param string $url
* @return SearchResultDTO|null
*/
private function delegateToOtherProvider(string $url): ?SearchResultDTO
{ {
//Add scheme if missing //Extract domain from url:
if (!preg_match('/^https?:\/\//', $id)) { $host = parse_url($url, PHP_URL_HOST);
//Remove any leading slashes if ($host === false || $host === null) {
$id = ltrim($id, '/'); return null;
$id = 'https://'.$id;
} }
$url = $id; $provider = $this->providerRegistry->getProviderHandlingDomain($host);
if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) {
try {
$id = $provider->getIDFromURL($url);
if ($id !== null) {
$results = $this->infoRetriever->searchByKeyword($id, [$provider]);
if (count($results) > 0) {
return $results[0];
}
}
return null;
} catch (ProviderIDNotSupportedException $e) {
//Ignore and continue
return null;
}
}
return null;
}
private function fixAndValidateURL(string $url): string
{
$originalUrl = $url;
//Add scheme if missing
if (!preg_match('/^https?:\/\//', $url)) {
//Remove any leading slashes
$url = ltrim($url, '/');
$url = 'https://'.$url;
}
//If this is not a valid URL with host, domain and path, throw an exception //If this is not a valid URL with host, domain and path, throw an exception
if (filter_var($url, FILTER_VALIDATE_URL) === false || if (filter_var($url, FILTER_VALIDATE_URL) === false ||
parse_url($url, PHP_URL_HOST) === null || parse_url($url, PHP_URL_HOST) === null ||
parse_url($url, PHP_URL_PATH) === null) { parse_url($url, PHP_URL_PATH) === null) {
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id); throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl);
}
return $url;
}
public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO
{
$url = $this->fixAndValidateURL($id);
if ($check_for_delegation) {
//Before loading the page, try to delegate to another provider
$delegatedPart = $this->delegateToOtherProvider($url);
if ($delegatedPart !== null) {
return $this->infoRetriever->getDetailsForSearchResult($delegatedPart);
}
} }
//Try to get the webpage content //Try to get the webpage content

View file

@ -33,7 +33,7 @@ use App\Settings\InfoProviderSystem\LCSCSettings;
use Symfony\Component\HttpFoundation\Cookie; use Symfony\Component\HttpFoundation\Cookie;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
class LCSCProvider implements BatchInfoProviderInterface class LCSCProvider implements BatchInfoProviderInterface, URLHandlerInfoProviderInterface
{ {
private const ENDPOINT_URL = 'https://wmsc.lcsc.com/ftps/wm'; private const ENDPOINT_URL = 'https://wmsc.lcsc.com/ftps/wm';
@ -452,4 +452,21 @@ class LCSCProvider implements BatchInfoProviderInterface
ProviderCapabilities::FOOTPRINT, ProviderCapabilities::FOOTPRINT,
]; ];
} }
public function getHandledDomains(): array
{
return ['lcsc.com'];
}
public function getIDFromURL(string $url): ?string
{
//Input example: https://www.lcsc.com/product-detail/C258144.html?s_z=n_BC547
//The part between the "C" and the ".html" is the unique ID
$matches = [];
if (preg_match("#/product-detail/(\w+)\.html#", $url, $matches) > 0) {
return $matches[1];
}
return null;
}
} }

View file

@ -36,7 +36,7 @@ use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
class PollinProvider implements InfoProviderInterface class PollinProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface
{ {
public function __construct(private readonly HttpClientInterface $client, public function __construct(private readonly HttpClientInterface $client,
@ -141,11 +141,16 @@ class PollinProvider implements InfoProviderInterface
$orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here
//Calculate the mass //Calculate the mass
$massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); $massDom = $dom->filter('meta[itemprop="weight"]');
//Remove the unit if ($massDom->count() > 0) {
$massStr = str_replace('kg', '', $massStr); $massStr = $massDom->attr('content');
//Convert to float and convert to grams $massStr = str_replace('kg', '', $massStr);
$mass = (float) $massStr * 1000; //Convert to float and convert to grams
$mass = (float) $massStr * 1000;
} else {
$mass = null;
}
//Parse purchase info //Parse purchase info
$purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl); $purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl);
@ -248,4 +253,22 @@ class PollinProvider implements InfoProviderInterface
ProviderCapabilities::DATASHEET ProviderCapabilities::DATASHEET
]; ];
} }
}
public function getHandledDomains(): array
{
return ['pollin.de'];
}
public function getIDFromURL(string $url): ?string
{
//URL like: https://www.pollin.de/p/shelly-bluetooth-schalter-und-dimmer-blu-zb-button-plug-play-mocha-592325
//Extract the 6-digit number at the end of the URL
$matches = [];
if (preg_match('/-(\d{6})(?:\/|$)/', $url, $matches)) {
return $matches[1];
}
return null;
}
}

View file

@ -32,7 +32,7 @@ use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use App\Settings\InfoProviderSystem\TMESettings; use App\Settings\InfoProviderSystem\TMESettings;
class TMEProvider implements InfoProviderInterface class TMEProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface
{ {
private const VENDOR_NAME = 'TME'; private const VENDOR_NAME = 'TME';
@ -296,4 +296,22 @@ class TMEProvider implements InfoProviderInterface
ProviderCapabilities::PRICE, ProviderCapabilities::PRICE,
]; ];
} }
public function getHandledDomains(): array
{
return ['tme.eu'];
}
public function getIDFromURL(string $url): ?string
{
//Input: https://www.tme.eu/de/details/fi321_se/kuhler/alutronic/
//The ID is the part after the details segment and before the next slash
$matches = [];
if (preg_match('#/details/([^/]+)/#', $url, $matches) === 1) {
return $matches[1];
}
return null;
}
} }

View file

@ -0,0 +1,43 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers;
/**
* If an interface
*/
interface URLHandlerInfoProviderInterface
{
/**
* Returns a list of supported domains (e.g. ["digikey.com"])
* @return array An array of supported domains
*/
public function getHandledDomains(): array;
/**
* Extracts the unique ID of a part from a given URL. It is okay if this is not a canonical ID, as long as it can be used to uniquely identify the part within this provider.
* @param string $url The URL to extract the ID from
* @return string|null The extracted ID, or null if the URL is not valid for this provider
*/
public function getIDFromURL(string $url): ?string;
}

View file

@ -24,6 +24,7 @@ namespace App\Tests\Services\InfoProviderSystem;
use App\Services\InfoProviderSystem\ProviderRegistry; use App\Services\InfoProviderSystem\ProviderRegistry;
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface; use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
class ProviderRegistryTest extends TestCase class ProviderRegistryTest extends TestCase
@ -44,9 +45,10 @@ class ProviderRegistryTest extends TestCase
public function getMockProvider(string $key, bool $active = true): InfoProviderInterface public function getMockProvider(string $key, bool $active = true): InfoProviderInterface
{ {
$mock = $this->createMock(InfoProviderInterface::class); $mock = $this->createMockForIntersectionOfInterfaces([InfoProviderInterface::class, URLHandlerInfoProviderInterface::class]);
$mock->method('getProviderKey')->willReturn($key); $mock->method('getProviderKey')->willReturn($key);
$mock->method('isActive')->willReturn($active); $mock->method('isActive')->willReturn($active);
$mock->method('getHandledDomains')->willReturn(["$key.com", "test.$key.de"]);
return $mock; return $mock;
} }
@ -109,4 +111,18 @@ class ProviderRegistryTest extends TestCase
$registry->getProviders(); $registry->getProviders();
} }
public function testGetProviderHandlingDomain(): void
{
$registry = new ProviderRegistry($this->providers);
$this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('test1.com'));
$this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('www.test1.com')); //Subdomain should also work
$this->assertEquals(
$this->providers[1],
$registry->getProviderHandlingDomain('test.test2.de')
);
}
} }

View file

@ -64,7 +64,7 @@ final class BarcodeRedirectorTest extends KernelTestCase
{ {
yield [new LocalBarcodeScanResult(LabelSupportedElement::PART, 1, BarcodeSourceType::INTERNAL), '/en/part/1']; yield [new LocalBarcodeScanResult(LabelSupportedElement::PART, 1, BarcodeSourceType::INTERNAL), '/en/part/1'];
//Part lot redirects to Part info page (Part lot 1 is associated with part 3) //Part lot redirects to Part info page (Part lot 1 is associated with part 3)
yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3']; yield [new LocalBarcodeScanResult(LabelSupportedElement::PART_LOT, 1, BarcodeSourceType::INTERNAL), '/en/part/3?highlightLot=1'];
yield [new LocalBarcodeScanResult(LabelSupportedElement::STORELOCATION, 1, BarcodeSourceType::INTERNAL), '/en/store_location/1/parts']; yield [new LocalBarcodeScanResult(LabelSupportedElement::STORELOCATION, 1, BarcodeSourceType::INTERNAL), '/en/store_location/1/parts'];
} }