mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2026-02-11 12:09:36 +00:00
Added logic to delegate the info retrieval logic to another provider when giving an URL
This commit is contained in:
parent
47c7ee9f07
commit
10acc2e130
5 changed files with 169 additions and 11 deletions
|
|
@ -24,6 +24,7 @@ declare(strict_types=1);
|
||||||
namespace App\Services\InfoProviderSystem;
|
namespace App\Services\InfoProviderSystem;
|
||||||
|
|
||||||
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
|
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
|
||||||
|
use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class keeps track of all registered info providers and allows to find them by their key
|
* This class keeps track of all registered info providers and allows to find them by their key
|
||||||
|
|
@ -47,6 +48,8 @@ final class ProviderRegistry
|
||||||
*/
|
*/
|
||||||
private array $providers_disabled = [];
|
private array $providers_disabled = [];
|
||||||
|
|
||||||
|
private array $providers_by_domain = [];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var bool Whether the registry has been initialized
|
* @var bool Whether the registry has been initialized
|
||||||
*/
|
*/
|
||||||
|
|
@ -78,6 +81,14 @@ final class ProviderRegistry
|
||||||
$this->providers_by_name[$key] = $provider;
|
$this->providers_by_name[$key] = $provider;
|
||||||
if ($provider->isActive()) {
|
if ($provider->isActive()) {
|
||||||
$this->providers_active[$key] = $provider;
|
$this->providers_active[$key] = $provider;
|
||||||
|
if ($provider instanceof URLHandlerInfoProviderInterface) {
|
||||||
|
foreach ($provider->getHandledDomains() as $domain) {
|
||||||
|
if (isset($this->providers_by_domain[$domain])) {
|
||||||
|
throw new \LogicException("Domain $domain is already handled by another provider");
|
||||||
|
}
|
||||||
|
$this->providers_by_domain[$domain] = $provider;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
$this->providers_disabled[$key] = $provider;
|
$this->providers_disabled[$key] = $provider;
|
||||||
}
|
}
|
||||||
|
|
@ -139,4 +150,29 @@ final class ProviderRegistry
|
||||||
|
|
||||||
return $this->providers_disabled;
|
return $this->providers_disabled;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
public function getProviderHandlingDomain(string $domain): (InfoProviderInterface&URLHandlerInfoProviderInterface)|null
|
||||||
|
{
|
||||||
|
if (!$this->initialized) {
|
||||||
|
$this->initStructures();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Check if the domain is directly existing:
|
||||||
|
if (isset($this->providers_by_domain[$domain])) {
|
||||||
|
return $this->providers_by_domain[$domain];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Otherwise check for subdomains:
|
||||||
|
$parts = explode('.', $domain);
|
||||||
|
while (count($parts) > 2) {
|
||||||
|
array_shift($parts);
|
||||||
|
$check_domain = implode('.', $parts);
|
||||||
|
if (isset($this->providers_by_domain[$check_domain])) {
|
||||||
|
return $this->providers_by_domain[$check_domain];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//If we found nothing, return null
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,9 @@ use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
|
||||||
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||||
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
|
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
|
||||||
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
|
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
|
||||||
|
use App\Services\InfoProviderSystem\PartInfoRetriever;
|
||||||
|
use App\Services\InfoProviderSystem\ProviderRegistry;
|
||||||
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
|
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
|
||||||
use PhpOffice\PhpSpreadsheet\Calculation\Financial\Securities\Price;
|
|
||||||
use Symfony\Component\DomCrawler\Crawler;
|
use Symfony\Component\DomCrawler\Crawler;
|
||||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||||
|
|
||||||
|
|
@ -40,7 +41,9 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
|
|
||||||
private readonly HttpClientInterface $httpClient;
|
private readonly HttpClientInterface $httpClient;
|
||||||
|
|
||||||
public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings)
|
public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings,
|
||||||
|
private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever,
|
||||||
|
)
|
||||||
{
|
{
|
||||||
$this->httpClient = $httpClient->withOptions(
|
$this->httpClient = $httpClient->withOptions(
|
||||||
[
|
[
|
||||||
|
|
@ -228,6 +231,37 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegates the URL to another provider if possible, otherwise return null
|
||||||
|
* @param string $url
|
||||||
|
* @return PartDetailDTO|null
|
||||||
|
*/
|
||||||
|
private function delegateToOtherProvider(string $url): ?PartDetailDTO
|
||||||
|
{
|
||||||
|
//Extract domain from url:
|
||||||
|
$host = parse_url($url, PHP_URL_HOST);
|
||||||
|
if ($host === false || $host === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$provider = $this->providerRegistry->getProviderHandlingDomain($host);
|
||||||
|
|
||||||
|
if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) {
|
||||||
|
try {
|
||||||
|
$id = $provider->getIDFromURL($url);
|
||||||
|
if ($id !== null) {
|
||||||
|
return $this->infoRetriever->getDetails($provider->getProviderKey(), $id);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} catch (ProviderIDNotSupportedException $e) {
|
||||||
|
//Ignore and continue
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public function getDetails(string $id): PartDetailDTO
|
public function getDetails(string $id): PartDetailDTO
|
||||||
{
|
{
|
||||||
//Add scheme if missing
|
//Add scheme if missing
|
||||||
|
|
@ -247,6 +281,12 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id);
|
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Before loading the page, try to delegate to another provider
|
||||||
|
$delegatedPart = $this->delegateToOtherProvider($url);
|
||||||
|
if ($delegatedPart !== null) {
|
||||||
|
return $delegatedPart;
|
||||||
|
}
|
||||||
|
|
||||||
//Try to get the webpage content
|
//Try to get the webpage content
|
||||||
$response = $this->httpClient->request('GET', $url);
|
$response = $this->httpClient->request('GET', $url);
|
||||||
$content = $response->getContent();
|
$content = $response->getContent();
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ use Symfony\Component\DependencyInjection\Attribute\Autowire;
|
||||||
use Symfony\Component\DomCrawler\Crawler;
|
use Symfony\Component\DomCrawler\Crawler;
|
||||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||||
|
|
||||||
class PollinProvider implements InfoProviderInterface
|
class PollinProvider implements InfoProviderInterface, URLHandlerInfoProviderInterface
|
||||||
{
|
{
|
||||||
|
|
||||||
public function __construct(private readonly HttpClientInterface $client,
|
public function __construct(private readonly HttpClientInterface $client,
|
||||||
|
|
@ -141,11 +141,16 @@ class PollinProvider implements InfoProviderInterface
|
||||||
$orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here
|
$orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here
|
||||||
|
|
||||||
//Calculate the mass
|
//Calculate the mass
|
||||||
$massStr = $dom->filter('meta[itemprop="weight"]')->attr('content');
|
$massDom = $dom->filter('meta[itemprop="weight"]');
|
||||||
//Remove the unit
|
if ($massDom->count() > 0) {
|
||||||
$massStr = str_replace('kg', '', $massStr);
|
$massStr = $massDom->attr('content');
|
||||||
//Convert to float and convert to grams
|
$massStr = str_replace('kg', '', $massStr);
|
||||||
$mass = (float) $massStr * 1000;
|
//Convert to float and convert to grams
|
||||||
|
$mass = (float) $massStr * 1000;
|
||||||
|
} else {
|
||||||
|
$mass = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//Parse purchase info
|
//Parse purchase info
|
||||||
$purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl);
|
$purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl);
|
||||||
|
|
@ -248,4 +253,22 @@ class PollinProvider implements InfoProviderInterface
|
||||||
ProviderCapabilities::DATASHEET
|
ProviderCapabilities::DATASHEET
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
public function getHandledDomains(): array
|
||||||
|
{
|
||||||
|
return ['pollin.de'];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getIDFromURL(string $url): ?string
|
||||||
|
{
|
||||||
|
//URL like: https://www.pollin.de/p/shelly-bluetooth-schalter-und-dimmer-blu-zb-button-plug-play-mocha-592325
|
||||||
|
|
||||||
|
//Extract the 6-digit number at the end of the URL
|
||||||
|
$matches = [];
|
||||||
|
if (preg_match('/-(\d{6})(?:\/|$)/', $url, $matches)) {
|
||||||
|
return $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
|
||||||
|
*
|
||||||
|
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
|
||||||
|
namespace App\Services\InfoProviderSystem\Providers;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If an interface
|
||||||
|
*/
|
||||||
|
interface URLHandlerInfoProviderInterface
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Returns a list of supported domains (e.g. ["digikey.com"])
|
||||||
|
* @return array An array of supported domains
|
||||||
|
*/
|
||||||
|
public function getHandledDomains(): array;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the unique ID of a part from a given URL. It is okay if this is not a canonical ID, as long as it can be used to uniquely identify the part within this provider.
|
||||||
|
* @param string $url The URL to extract the ID from
|
||||||
|
* @return string|null The extracted ID, or null if the URL is not valid for this provider
|
||||||
|
*/
|
||||||
|
public function getIDFromURL(string $url): ?string;
|
||||||
|
}
|
||||||
|
|
@ -24,6 +24,7 @@ namespace App\Tests\Services\InfoProviderSystem;
|
||||||
|
|
||||||
use App\Services\InfoProviderSystem\ProviderRegistry;
|
use App\Services\InfoProviderSystem\ProviderRegistry;
|
||||||
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
|
use App\Services\InfoProviderSystem\Providers\InfoProviderInterface;
|
||||||
|
use App\Services\InfoProviderSystem\Providers\URLHandlerInfoProviderInterface;
|
||||||
use PHPUnit\Framework\TestCase;
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
class ProviderRegistryTest extends TestCase
|
class ProviderRegistryTest extends TestCase
|
||||||
|
|
@ -44,9 +45,10 @@ class ProviderRegistryTest extends TestCase
|
||||||
|
|
||||||
public function getMockProvider(string $key, bool $active = true): InfoProviderInterface
|
public function getMockProvider(string $key, bool $active = true): InfoProviderInterface
|
||||||
{
|
{
|
||||||
$mock = $this->createMock(InfoProviderInterface::class);
|
$mock = $this->createMockForIntersectionOfInterfaces([InfoProviderInterface::class, URLHandlerInfoProviderInterface::class]);
|
||||||
$mock->method('getProviderKey')->willReturn($key);
|
$mock->method('getProviderKey')->willReturn($key);
|
||||||
$mock->method('isActive')->willReturn($active);
|
$mock->method('isActive')->willReturn($active);
|
||||||
|
$mock->method('getHandledDomains')->willReturn(["$key.com", "test.$key.de"]);
|
||||||
|
|
||||||
return $mock;
|
return $mock;
|
||||||
}
|
}
|
||||||
|
|
@ -109,4 +111,18 @@ class ProviderRegistryTest extends TestCase
|
||||||
|
|
||||||
$registry->getProviders();
|
$registry->getProviders();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testGetProviderHandlingDomain(): void
|
||||||
|
{
|
||||||
|
$registry = new ProviderRegistry($this->providers);
|
||||||
|
|
||||||
|
$this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('test1.com'));
|
||||||
|
$this->assertEquals($this->providers[0], $registry->getProviderHandlingDomain('www.test1.com')); //Subdomain should also work
|
||||||
|
|
||||||
|
$this->assertEquals(
|
||||||
|
$this->providers[1],
|
||||||
|
$registry->getProviderHandlingDomain('test.test2.de')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue