mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2026-05-16 00:11:35 +00:00
Added URL delegation feature to AI provider and added option to skip that delegation
This commit is contained in:
parent
aac5b8e0be
commit
889aa08b4e
9 changed files with 121 additions and 52 deletions
|
|
@ -27,6 +27,7 @@ namespace App\Services\InfoProviderSystem\Providers;
|
|||
use App\Exceptions\ProviderIDNotSupportedException;
|
||||
use App\Helpers\RandomizeUseragentHttpClient;
|
||||
use App\Services\AI\AIPlatformRegistry;
|
||||
use App\Services\InfoProviderSystem\CreateFromUrlHelper;
|
||||
use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
|
||||
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||
use App\Settings\InfoProviderSystem\AIExtractorSettings;
|
||||
|
|
@ -57,7 +58,8 @@ final class AIWebProvider implements InfoProviderInterface
|
|||
private readonly AIExtractorSettings $settings,
|
||||
private readonly AIPlatformRegistry $AIPlatformRegistry,
|
||||
private readonly DTOJsonSchemaConverter $jsonSchemaConverter,
|
||||
private readonly CacheItemPoolInterface $partInfoCache
|
||||
private readonly CacheItemPoolInterface $partInfoCache,
|
||||
private readonly CreateFromUrlHelper $createFromUrlHelper,
|
||||
) {
|
||||
//Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us
|
||||
$this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions(
|
||||
|
|
@ -90,9 +92,23 @@ final class AIWebProvider implements InfoProviderInterface
|
|||
|
||||
public function searchByKeyword(string $keyword, array $options = []): array
|
||||
{
|
||||
$url = $this->fixAndValidateURL($keyword);
|
||||
|
||||
if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) {
|
||||
//Before loading the page, try to delegate to another provider
|
||||
$delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this);
|
||||
if ($delegatedPart !== null) {
|
||||
return [$delegatedPart];
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
$new_options = $options;
|
||||
$new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops
|
||||
|
||||
return [
|
||||
$this->getDetails($keyword, $options)
|
||||
$this->getDetails($keyword, $new_options)
|
||||
]; } catch (ProviderIDNotSupportedException $e) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -102,6 +118,14 @@ final class AIWebProvider implements InfoProviderInterface
|
|||
{
|
||||
$url = $this->fixAndValidateURL($id);
|
||||
|
||||
if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) {
|
||||
//Before loading the page, try to delegate to another provider
|
||||
$delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this);
|
||||
if ($delegatedPart !== null) {
|
||||
return $delegatedPart;
|
||||
}
|
||||
}
|
||||
|
||||
//Check if we have a cached result for this URL, to avoid unnecessary LLM calls, which can be slow and costly.
|
||||
$cacheKey = 'ai_web_'.hash('xxh3', $url);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ namespace App\Services\InfoProviderSystem\Providers;
|
|||
|
||||
use App\Exceptions\ProviderIDNotSupportedException;
|
||||
use App\Helpers\RandomizeUseragentHttpClient;
|
||||
use App\Services\InfoProviderSystem\CreateFromUrlHelper;
|
||||
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
|
||||
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
|
||||
|
|
@ -50,14 +51,12 @@ class GenericWebProvider implements InfoProviderInterface
|
|||
|
||||
use FixAndValidateUrlTrait;
|
||||
|
||||
public const OPTION_CHECK_FOR_DELEGATION = 'check_for_delegation';
|
||||
|
||||
public const DISTRIBUTOR_NAME = 'Website';
|
||||
|
||||
private readonly HttpClientInterface $httpClient;
|
||||
|
||||
public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings,
|
||||
private readonly ProviderRegistry $providerRegistry, private readonly PartInfoRetriever $infoRetriever,
|
||||
private readonly CreateFromUrlHelper $createFromUrlHelper,
|
||||
)
|
||||
{
|
||||
//Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us
|
||||
|
|
@ -93,15 +92,19 @@ class GenericWebProvider implements InfoProviderInterface
|
|||
{
|
||||
$url = $this->fixAndValidateURL($keyword);
|
||||
|
||||
//Before loading the page, try to delegate to another provider
|
||||
$delegatedPart = $this->delegateToOtherProvider($url);
|
||||
if ($delegatedPart !== null) {
|
||||
return [$delegatedPart];
|
||||
if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) {
|
||||
//Before loading the page, try to delegate to another provider
|
||||
$delegatedPart = $this->createFromUrlHelper->delegateToOtherProvider($url, $this);
|
||||
if ($delegatedPart !== null) {
|
||||
return [$delegatedPart];
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
$new_options = $options;
|
||||
$new_options[self::OPTION_SKIP_DELEGATION] = true; //Skip delegation for the getDetails call to prevent infinite loops
|
||||
return [
|
||||
$this->getDetails($keyword, [self::OPTION_CHECK_FOR_DELEGATION => false]) //We already tried delegation
|
||||
$this->getDetails($keyword, $new_options)
|
||||
]; } catch (ProviderIDNotSupportedException $e) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -278,53 +281,16 @@ class GenericWebProvider implements InfoProviderInterface
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delegates the URL to another provider if possible, otherwise return null
|
||||
* @param string $url
|
||||
* @return SearchResultDTO|null
|
||||
*/
|
||||
private function delegateToOtherProvider(string $url): ?SearchResultDTO
|
||||
{
|
||||
//Extract domain from url:
|
||||
$host = parse_url($url, PHP_URL_HOST);
|
||||
if ($host === false || $host === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$provider = $this->providerRegistry->getProviderHandlingDomain($host);
|
||||
|
||||
if ($provider !== null && $provider->isActive() && $provider->getProviderKey() !== $this->getProviderKey()) {
|
||||
try {
|
||||
$id = $provider->getIDFromURL($url);
|
||||
if ($id !== null) {
|
||||
$results = $this->infoRetriever->searchByKeyword($id, [$provider]);
|
||||
if (count($results) > 0) {
|
||||
return $results[0];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} catch (ProviderIDNotSupportedException $e) {
|
||||
//Ignore and continue
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public function getDetails(string $id, array $options = []): PartDetailDTO
|
||||
{
|
||||
//We check for delegation by default
|
||||
$check_for_delegation = $options[self::OPTION_CHECK_FOR_DELEGATION] ?? true;
|
||||
$url = $this->fixAndValidateURL($id);
|
||||
|
||||
if ($check_for_delegation) {
|
||||
if (!($options[self::OPTION_SKIP_DELEGATION] ?? false)) {
|
||||
//Before loading the page, try to delegate to another provider
|
||||
$delegatedPart = $this->delegateToOtherProvider($url);
|
||||
$delegatedPart = $this->createFromUrlHelper->delegateToOtherProviderDetails($url, $this);
|
||||
if ($delegatedPart !== null) {
|
||||
return $this->infoRetriever->getDetailsForSearchResult($delegatedPart);
|
||||
return $delegatedPart;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
|
|||
interface InfoProviderInterface
|
||||
{
|
||||
public const OPTION_NO_CACHE = 'no_cache'; // if set to true, the provider should not use any cache and retrieve fresh data from the source
|
||||
public const OPTION_SKIP_DELEGATION = 'skip_delegation'; // if set to true, the provider should not delegate the request to other providers, even if it supports delegation.
|
||||
|
||||
/**
|
||||
* Get information about this provider
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue