Refactored and cleaned up AIInfoExtractor

This commit is contained in:
Jan Böhmer 2026-04-25 22:21:06 +02:00
parent 9cf16248e6
commit c0017d29a7
4 changed files with 324 additions and 430 deletions

View file

@ -0,0 +1,239 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\InfoProviderSystem;
use App\Entity\Parts\ManufacturingStatus;
use App\Services\InfoProviderSystem\DTOs\FileDTO;
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
/**
* This class allows to convert the JSON data returned by an LLM into the DTOs used by the info provider system later.
*/
final class DTOJsonSchemaConverter
{
/**
* Returns the JSON schema, that defines the expected structure of the JSON data returned by the LLM.
* @return array
*/
public function getJSONSchema(): array
{
return [
'name' => 'clock',
'strict' => true,
'schema' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string', 'description' => 'Product name'],
'description' => ['type' => 'string', 'description' => 'Product description'],
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
'parameters' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string'],
'value' => ['type' => 'string'],
'unit' => ['type' => ['string', 'null']],
],
'required' => ['name', 'value'],
],
],
'datasheets' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'images' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'vendor_infos' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'distributor_name' => ['type' => 'string'],
'order_number' => ['type' => ['string', 'null']],
'product_url' => ['type' => 'string'],
'prices' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'minimum_quantity' => ['type' => 'integer'],
'price' => ['type' => 'number'],
'currency' => ['type' => 'string'],
],
'required' => ['minimum_quantity', 'price', 'currency'],
],
],
],
'required' => ['distributor_name', 'product_url'],
],
],
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
],
'required' => ['name', 'description'],
]
];
}
public function jsonToDTO(array $data, string $providerKey, string $providerId, ?string $productUrl = null, string $distributorNameFallback = '???'): PartDetailDTO
{
// Map manufacturing status
$manufacturingStatus = null;
if (!empty($data['manufacturing_status'])) {
$status = strtolower((string) $data['manufacturing_status']);
$manufacturingStatus = match ($status) {
'active' => ManufacturingStatus::ACTIVE,
'obsolete', 'discontinued' => ManufacturingStatus::DISCONTINUED,
'nrfnd', 'not recommended for new designs' => ManufacturingStatus::NRFND,
'eol' => ManufacturingStatus::EOL,
'announced' => ManufacturingStatus::ANNOUNCED,
default => null,
};
}
// Build parameters
$parameters = null;
if (!empty($data['parameters']) && is_array($data['parameters'])) {
$parameters = [];
foreach ($data['parameters'] as $p) {
if (!empty($p['name'])) {
$value = $p['value'] ?? '';
$unit = $p['unit'] ?? null;
// Combine value and unit for parsing
$valueWithUnit = $unit ? $value . ' ' . $unit : $value;
$parameters[] = ParameterDTO::parseValueField(
name: $p['name'],
value: $valueWithUnit
);
}
}
}
// Build datasheets
$datasheets = null;
if (!empty($data['datasheets']) && is_array($data['datasheets'])) {
$datasheets = [];
foreach ($data['datasheets'] as $d) {
if (!empty($d['url'])) {
$datasheets[] = new FileDTO(
url: $d['url'],
name: $d['description'] ?? 'Datasheet'
);
}
}
}
// Build images
$images = null;
if (!empty($data['images']) && is_array($data['images'])) {
$images = [];
foreach ($data['images'] as $i) {
if (!empty($i['url'])) {
$images[] = new FileDTO(
url: $i['url'],
name: $i['description'] ?? 'Image'
);
}
}
}
// Build vendor infos
$vendorInfos = null;
if (!empty($data['vendor_infos']) && is_array($data['vendor_infos'])) {
$vendorInfos = [];
foreach ($data['vendor_infos'] as $v) {
$prices = [];
if (!empty($v['prices']) && is_array($v['prices'])) {
foreach ($v['prices'] as $p) {
$prices[] = new PriceDTO(
minimum_discount_amount: (int) ($p['minimum_quantity'] ?? 1),
price: (string) ($p['price'] ?? 0),
currency_iso_code: $p['currency'] ?? 'USD',
price_related_quantity: (int) ($p['minimum_quantity'] ?? 1),
);
}
}
$vendorInfos[] = new PurchaseInfoDTO(
distributor_name: $v['distributor_name'] ?? $distributorNameFallback,
order_number: $v['order_number'] ?? 'Unknown',
prices: $prices,
product_url: $v['product_url'] ?? $productUrl,
);
}
}
// Get preview image URL
$previewImageUrl = null;
if (!empty($data['images']) && is_array($data['images']) && !empty($data['images'][0]['url'])) {
$previewImageUrl = $data['images'][0]['url'];
}
return new PartDetailDTO(
provider_key: $providerKey,
provider_id: $providerId,
name: $data['name'] ?? 'Unknown',
description: $data['description'] ?? '',
category: $data['category'] ?? null,
manufacturer: $data['manufacturer'] ?? null,
mpn: $data['mpn'] ?? null,
preview_image_url: $previewImageUrl,
manufacturing_status: $manufacturingStatus,
provider_url: $productUrl,
footprint: $data['footprint'] ?? null,
notes: null,
datasheets: $datasheets,
images: $images,
parameters: $parameters,
vendor_infos: $vendorInfos,
mass: isset($data['mass']) && is_numeric($data['mass']) ? (float) $data['mass'] : null,
manufacturer_product_url: $data['manufacturer_product_url'] ?? null,
);
}
}

View file

@ -24,32 +24,31 @@ declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers;
use App\Entity\Parts\ManufacturingStatus;
use App\Services\InfoProviderSystem\DTOs\FileDTO;
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
use App\Exceptions\ProviderIDNotSupportedException;
use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use App\Settings\InfoProviderSystem\AIExtractorSettings;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Platform;
use Symfony\AI\Platform\PlatformInterface;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class AIInfoExtractor implements InfoProviderInterface
final class AIInfoExtractor implements InfoProviderInterface
{
use FixAndValidateUrlTrait;
private const DISTRIBUTOR_NAME = 'AI Extracted';
private readonly HttpClientInterface $httpClient;
public function __construct(HttpClientInterface $httpClient, private readonly AIExtractorSettings $settings,
public function __construct(
HttpClientInterface $httpClient,
private readonly AIExtractorSettings $settings,
#[Autowire(service: "ai.traceable_platform.openrouter")]
private readonly PlatformInterface $aiPlatform
)
{
private readonly PlatformInterface $aiPlatform,
private readonly DTOJsonSchemaConverter $jsonSchemaConverter,
) {
$this->httpClient = $httpClient->withOptions([
'timeout' => 30,
'headers' => [
@ -82,36 +81,17 @@ class AIInfoExtractor implements InfoProviderInterface
public function searchByKeyword(string $keyword): array
{
// Treat the keyword as a URL and return a single search result
$url = $this->normalizeURL($keyword);
//try {
$part = $this->getDetails($url);
return [
new SearchResultDTO(
provider_key: $this->getProviderKey(),
provider_id: $url,
name: $part->name,
description: $part->description,
category: $part->category,
manufacturer: $part->manufacturer,
mpn: $part->mpn,
preview_image_url: $part->preview_image_url,
manufacturing_status: $part->manufacturing_status,
provider_url: $part->provider_url,
footprint: $part->footprint,
gtin: $part->gtin,
),
];
//} catch (\Throwable $e) {
// // Return empty array on error
// return [];
//}
try {
return [
$this->getDetails($keyword)
]; } catch (ProviderIDNotSupportedException $e) {
return [];
}
}
public function getDetails(string $id): PartDetailDTO
{
$url = $this->normalizeURL($id);
$url = $this->fixAndValidateURL($id);
// Fetch HTML content
$response = $this->httpClient->request('GET', $url);
@ -123,14 +103,11 @@ class AIInfoExtractor implements InfoProviderInterface
// Truncate to max content length
$truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength);
// Call OpenRouter API
$llmResponse = $this->callOpenRouterAPI($truncatedHtml, $url);
// Parse JSON response
$data = json_decode($llmResponse, true, 512, JSON_THROW_ON_ERROR);
// Call LLM
$llmResponse = $this->callLLM($truncatedHtml, $url);
// Build and return PartDetailDTO
return $this->buildPartDetailDTO($data, $url);
return $this->jsonSchemaConverter->jsonToDTO($llmResponse, $this->getProviderKey(), $url, $url, self::DISTRIBUTOR_NAME);
}
public function getCapabilities(): array
@ -144,21 +121,6 @@ class AIInfoExtractor implements InfoProviderInterface
];
}
private function normalizeURL(string $url): string
{
// Add https:// if no protocol
if (!preg_match('/^https?:\/\//', $url)) {
$url = 'https://' . ltrim($url, '/');
}
// Validate URL
if (filter_var($url, FILTER_VALIDATE_URL) === false) {
throw new \InvalidArgumentException("Invalid URL: $url");
}
return $url;
}
private function cleanHTML(string $html): string
{
// Remove script tags
@ -201,249 +163,24 @@ class AIInfoExtractor implements InfoProviderInterface
return $truncated;
}
private function callOpenRouterAPI(string $htmlContent, string $url): string
private function callLLM(string $htmlContent, string $url): array
{
$input = new MessageBag(
Message::forSystem($this->buildSystemPrompt()),
Message::ofUser("Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent")
);
$models = $this->aiPlatform->getModelCatalog()->getModels();
try {
//'openai/gpt-5-mini'
$result = $this->aiPlatform->invoke('openrouter/auto', $input, [
'response_format' => 'json_schema',
'json_schema' => [
'name' => 'clock',
'strict' => true,
'schema' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string', 'description' => 'Product name'],
'description' => ['type' => 'string', 'description' => 'Product description'],
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
'parameters' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string'],
'value' => ['type' => 'string'],
'unit' => ['type' => ['string', 'null']],
],
'required' => ['name', 'value'],
],
],
'datasheets' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'images' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'vendor_infos' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'distributor_name' => ['type' => 'string'],
'order_number' => ['type' => ['string', 'null']],
'product_url' => ['type' => 'string'],
'prices' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'minimum_quantity' => ['type' => 'integer'],
'price' => ['type' => 'number'],
'currency' => ['type' => 'string'],
],
'required' => ['minimum_quantity', 'price', 'currency'],
],
],
],
'required' => ['distributor_name', 'product_url'],
],
],
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
],
'required' => ['name', 'description'],
],
],
'json_schema' => $this->jsonSchemaConverter->getJSONSchema(),
]);
} catch (\Throwable $e) {
dump($e);
throw new \RuntimeException('LLM invocation failed: ' . $e->getMessage(), previous: $e);
throw new \RuntimeException('LLM invocation failed: '.$e->getMessage(), previous: $e);
}
dump($result->getResult()->getContent());
return json_encode($result->getResult()->getContent());
/*
$systemPrompt = $this->buildSystemPrompt();
// Define the tool/function for structured output
$toolDefinition = [
'type' => 'function',
'function' => [
'name' => 'extract_part_info',
'description' => 'Extract electronic component information from a webpage',
'parameters' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string', 'description' => 'Product name'],
'description' => ['type' => 'string', 'description' => 'Product description'],
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
'parameters' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'name' => ['type' => 'string'],
'value' => ['type' => 'string'],
'unit' => ['type' => ['string', 'null']],
],
'required' => ['name', 'value'],
],
],
'datasheets' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'images' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'url' => ['type' => 'string'],
'description' => ['type' => 'string'],
],
'required' => ['url'],
],
],
'vendor_infos' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'distributor_name' => ['type' => 'string'],
'order_number' => ['type' => ['string', 'null']],
'product_url' => ['type' => 'string'],
'prices' => [
'type' => 'array',
'items' => [
'type' => 'object',
'properties' => [
'minimum_quantity' => ['type' => 'integer'],
'price' => ['type' => 'number'],
'currency' => ['type' => 'string'],
],
'required' => ['minimum_quantity', 'price', 'currency'],
],
],
],
'required' => ['distributor_name', 'product_url'],
],
],
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
],
'required' => ['name', 'description'],
],
],
];
$payload = [
'model' => $this->settings->model,
'messages' => [
[
'role' => 'system',
'content' => $systemPrompt,
],
[
'role' => 'user',
'content' => "Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent",
],
],
'tools' => [$toolDefinition],
'tool_choice' => ['type' => 'function', 'function' => ['name' => 'extract_part_info']],
'max_tokens' => 4096,
'temperature' => 0.1,
];
$response = $this->httpClient->request('POST', 'https://openrouter.ai/api/v1/chat/completions', [
'headers' => [
'Authorization' => 'Bearer ' . $this->settings->apiKey,
'Content-Type' => 'application/json',
'HTTP-Referer' => 'https://github.com/Part-DB/Part-DB-server',
'X-Title' => 'Part-DB AI Info Extractor',
],
'json' => $payload,
]);
$data = $response->toArray();
$message = $data['choices'][0]['message'] ?? null;
if ($message === null) {
throw new \RuntimeException('No response message from LLM');
}
// Check if the model used the tool/function call
if (isset($message['tool_calls']) && !empty($message['tool_calls'])) {
foreach ($message['tool_calls'] as $toolCall) {
if ($toolCall['function']['name'] === 'extract_part_info') {
return $toolCall['function']['arguments'];
}
}
}
// Fallback to content if no tool call (some models might not support tool calling)
$content = $message['content'] ?? throw new \RuntimeException('No response content from LLM');
// Strip markdown code blocks if present (fallback for models without tool support)
$content = preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
$content = preg_replace('/\n?```\s*$/i', '', $content);
$content = trim($content);
return $content;
*/
return $result->getResult()->getContent();
}
private function buildSystemPrompt(): string
@ -485,119 +222,4 @@ For parameters, combine name, value, and unit. The unit should be separate if po
PROMPT;
}
private function buildPartDetailDTO(array $data, string $url): PartDetailDTO
{
// Map manufacturing status
$manufacturingStatus = null;
if (!empty($data['manufacturing_status'])) {
$status = strtolower((string) $data['manufacturing_status']);
$manufacturingStatus = match ($status) {
'active' => ManufacturingStatus::ACTIVE,
'obsolete', 'discontinued' => ManufacturingStatus::DISCONTINUED,
'nrfnd', 'not recommended for new designs' => ManufacturingStatus::NRFND,
'eol' => ManufacturingStatus::EOL,
'announced' => ManufacturingStatus::ANNOUNCED,
default => null,
};
}
// Build parameters
$parameters = null;
if (!empty($data['parameters']) && is_array($data['parameters'])) {
$parameters = [];
foreach ($data['parameters'] as $p) {
if (!empty($p['name'])) {
$value = $p['value'] ?? '';
$unit = $p['unit'] ?? null;
// Combine value and unit for parsing
$valueWithUnit = $unit ? $value . ' ' . $unit : $value;
$parameters[] = ParameterDTO::parseValueField(
name: $p['name'],
value: $valueWithUnit
);
}
}
}
// Build datasheets
$datasheets = null;
if (!empty($data['datasheets']) && is_array($data['datasheets'])) {
$datasheets = [];
foreach ($data['datasheets'] as $d) {
if (!empty($d['url'])) {
$datasheets[] = new FileDTO(
url: $d['url'],
name: $d['description'] ?? 'Datasheet'
);
}
}
}
// Build images
$images = null;
if (!empty($data['images']) && is_array($data['images'])) {
$images = [];
foreach ($data['images'] as $i) {
if (!empty($i['url'])) {
$images[] = new FileDTO(
url: $i['url'],
name: $i['description'] ?? 'Image'
);
}
}
}
// Build vendor infos
$vendorInfos = null;
if (!empty($data['vendor_infos']) && is_array($data['vendor_infos'])) {
$vendorInfos = [];
foreach ($data['vendor_infos'] as $v) {
$prices = [];
if (!empty($v['prices']) && is_array($v['prices'])) {
foreach ($v['prices'] as $p) {
$prices[] = new PriceDTO(
minimum_discount_amount: (int) ($p['minimum_quantity'] ?? 1),
price: (string) ($p['price'] ?? 0),
currency_iso_code: $p['currency'] ?? 'USD',
price_related_quantity: (int) ($p['minimum_quantity'] ?? 1),
);
}
}
$vendorInfos[] = new PurchaseInfoDTO(
distributor_name: $v['distributor_name'] ?? self::DISTRIBUTOR_NAME,
order_number: $v['order_number'] ?? 'Unknown',
prices: $prices,
product_url: $v['product_url'] ?? $url,
);
}
}
// Get preview image URL
$previewImageUrl = null;
if (!empty($data['images']) && is_array($data['images']) && !empty($data['images'][0]['url'])) {
$previewImageUrl = $data['images'][0]['url'];
}
return new PartDetailDTO(
provider_key: $this->getProviderKey(),
provider_id: $url,
name: $data['name'] ?? 'Unknown',
description: $data['description'] ?? '',
category: $data['category'] ?? null,
manufacturer: $data['manufacturer'] ?? null,
mpn: $data['mpn'] ?? null,
preview_image_url: $previewImageUrl,
manufacturing_status: $manufacturingStatus,
provider_url: $url,
footprint: $data['footprint'] ?? null,
mass: isset($data['mass']) && is_numeric($data['mass']) ? (float) $data['mass'] : null,
notes: null,
datasheets: $datasheets,
images: $images,
parameters: $parameters,
vendor_infos: $vendorInfos,
manufacturer_product_url: $data['manufacturer_product_url'] ?? null,
);
}
}

View file

@ -0,0 +1,58 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers;
use App\Exceptions\ProviderIDNotSupportedException;
trait FixAndValidateUrlTrait
{
private function fixAndValidateURL(string $url): string
{
$originalUrl = $url;
//Add scheme if missing
if (!preg_match('/^https?:\/\//', $url)) {
//Remove any leading slashes
$url = ltrim($url, '/');
//If the URL starts with https:/ or http:/, add the missing slash
//Traefik removes the double slash as secruity measure, so we want to be forgiving and add it back if needed
//See https://github.com/Part-DB/Part-DB-server/issues/1296
if (preg_match('/^https?:\/[^\/]/', $url)) {
$url = preg_replace('/^(https?:)\/([^\/])/', '$1//$2', $url);
} else {
$url = 'https://'.$url;
}
}
//If this is not a valid URL with host, domain and path, throw an exception
if (filter_var($url, FILTER_VALIDATE_URL) === false ||
parse_url($url, PHP_URL_HOST) === null ||
parse_url($url, PHP_URL_PATH) === null) {
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl);
}
return $url;
}
}

View file

@ -48,6 +48,8 @@ use Symfony\Contracts\HttpClient\HttpClientInterface;
class GenericWebProvider implements InfoProviderInterface
{
use FixAndValidateUrlTrait;
public const DISTRIBUTOR_NAME = 'Website';
private readonly HttpClientInterface $httpClient;
@ -308,34 +310,7 @@ class GenericWebProvider implements InfoProviderInterface
return null;
}
private function fixAndValidateURL(string $url): string
{
$originalUrl = $url;
//Add scheme if missing
if (!preg_match('/^https?:\/\//', $url)) {
//Remove any leading slashes
$url = ltrim($url, '/');
//If the URL starts with https:/ or http:/, add the missing slash
//Traefik removes the double slash as secruity measure, so we want to be forgiving and add it back if needed
//See https://github.com/Part-DB/Part-DB-server/issues/1296
if (preg_match('/^https?:\/[^\/]/', $url)) {
$url = preg_replace('/^(https?:)\/([^\/])/', '$1//$2', $url);
} else {
$url = 'https://'.$url;
}
}
//If this is not a valid URL with host, domain and path, throw an exception
if (filter_var($url, FILTER_VALIDATE_URL) === false ||
parse_url($url, PHP_URL_HOST) === null ||
parse_url($url, PHP_URL_PATH) === null) {
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl);
}
return $url;
}
public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO
{