mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2026-05-16 00:11:35 +00:00
Refactored and cleaned up AIInfoExtractor
This commit is contained in:
parent
9cf16248e6
commit
c0017d29a7
4 changed files with 324 additions and 430 deletions
239
src/Services/InfoProviderSystem/DTOJsonSchemaConverter.php
Normal file
239
src/Services/InfoProviderSystem/DTOJsonSchemaConverter.php
Normal file
|
|
@ -0,0 +1,239 @@
|
||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
|
||||||
|
*
|
||||||
|
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
|
||||||
|
namespace App\Services\InfoProviderSystem;
|
||||||
|
|
||||||
|
use App\Entity\Parts\ManufacturingStatus;
|
||||||
|
use App\Services\InfoProviderSystem\DTOs\FileDTO;
|
||||||
|
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
|
||||||
|
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||||
|
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
|
||||||
|
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class allows to convert the JSON data returned by an LLM into the DTOs used by the info provider system later.
|
||||||
|
*/
|
||||||
|
final class DTOJsonSchemaConverter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Returns the JSON schema, that defines the expected structure of the JSON data returned by the LLM.
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getJSONSchema(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'name' => 'clock',
|
||||||
|
'strict' => true,
|
||||||
|
'schema' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'name' => ['type' => 'string', 'description' => 'Product name'],
|
||||||
|
'description' => ['type' => 'string', 'description' => 'Product description'],
|
||||||
|
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
|
||||||
|
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
|
||||||
|
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
|
||||||
|
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
|
||||||
|
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
|
||||||
|
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
|
||||||
|
'parameters' => [
|
||||||
|
'type' => 'array',
|
||||||
|
'items' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'name' => ['type' => 'string'],
|
||||||
|
'value' => ['type' => 'string'],
|
||||||
|
'unit' => ['type' => ['string', 'null']],
|
||||||
|
],
|
||||||
|
'required' => ['name', 'value'],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
'datasheets' => [
|
||||||
|
'type' => 'array',
|
||||||
|
'items' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'url' => ['type' => 'string'],
|
||||||
|
'description' => ['type' => 'string'],
|
||||||
|
],
|
||||||
|
'required' => ['url'],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
'images' => [
|
||||||
|
'type' => 'array',
|
||||||
|
'items' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'url' => ['type' => 'string'],
|
||||||
|
'description' => ['type' => 'string'],
|
||||||
|
],
|
||||||
|
'required' => ['url'],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
'vendor_infos' => [
|
||||||
|
'type' => 'array',
|
||||||
|
'items' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'distributor_name' => ['type' => 'string'],
|
||||||
|
'order_number' => ['type' => ['string', 'null']],
|
||||||
|
'product_url' => ['type' => 'string'],
|
||||||
|
'prices' => [
|
||||||
|
'type' => 'array',
|
||||||
|
'items' => [
|
||||||
|
'type' => 'object',
|
||||||
|
'properties' => [
|
||||||
|
'minimum_quantity' => ['type' => 'integer'],
|
||||||
|
'price' => ['type' => 'number'],
|
||||||
|
'currency' => ['type' => 'string'],
|
||||||
|
],
|
||||||
|
'required' => ['minimum_quantity', 'price', 'currency'],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
'required' => ['distributor_name', 'product_url'],
|
||||||
|
],
|
||||||
|
],
|
||||||
|
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
|
||||||
|
],
|
||||||
|
'required' => ['name', 'description'],
|
||||||
|
]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function jsonToDTO(array $data, string $providerKey, string $providerId, ?string $productUrl = null, string $distributorNameFallback = '???'): PartDetailDTO
|
||||||
|
{
|
||||||
|
// Map manufacturing status
|
||||||
|
$manufacturingStatus = null;
|
||||||
|
if (!empty($data['manufacturing_status'])) {
|
||||||
|
$status = strtolower((string) $data['manufacturing_status']);
|
||||||
|
$manufacturingStatus = match ($status) {
|
||||||
|
'active' => ManufacturingStatus::ACTIVE,
|
||||||
|
'obsolete', 'discontinued' => ManufacturingStatus::DISCONTINUED,
|
||||||
|
'nrfnd', 'not recommended for new designs' => ManufacturingStatus::NRFND,
|
||||||
|
'eol' => ManufacturingStatus::EOL,
|
||||||
|
'announced' => ManufacturingStatus::ANNOUNCED,
|
||||||
|
default => null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build parameters
|
||||||
|
$parameters = null;
|
||||||
|
if (!empty($data['parameters']) && is_array($data['parameters'])) {
|
||||||
|
$parameters = [];
|
||||||
|
foreach ($data['parameters'] as $p) {
|
||||||
|
if (!empty($p['name'])) {
|
||||||
|
$value = $p['value'] ?? '';
|
||||||
|
$unit = $p['unit'] ?? null;
|
||||||
|
// Combine value and unit for parsing
|
||||||
|
$valueWithUnit = $unit ? $value . ' ' . $unit : $value;
|
||||||
|
$parameters[] = ParameterDTO::parseValueField(
|
||||||
|
name: $p['name'],
|
||||||
|
value: $valueWithUnit
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build datasheets
|
||||||
|
$datasheets = null;
|
||||||
|
if (!empty($data['datasheets']) && is_array($data['datasheets'])) {
|
||||||
|
$datasheets = [];
|
||||||
|
foreach ($data['datasheets'] as $d) {
|
||||||
|
if (!empty($d['url'])) {
|
||||||
|
$datasheets[] = new FileDTO(
|
||||||
|
url: $d['url'],
|
||||||
|
name: $d['description'] ?? 'Datasheet'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build images
|
||||||
|
$images = null;
|
||||||
|
if (!empty($data['images']) && is_array($data['images'])) {
|
||||||
|
$images = [];
|
||||||
|
foreach ($data['images'] as $i) {
|
||||||
|
if (!empty($i['url'])) {
|
||||||
|
$images[] = new FileDTO(
|
||||||
|
url: $i['url'],
|
||||||
|
name: $i['description'] ?? 'Image'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build vendor infos
|
||||||
|
$vendorInfos = null;
|
||||||
|
if (!empty($data['vendor_infos']) && is_array($data['vendor_infos'])) {
|
||||||
|
$vendorInfos = [];
|
||||||
|
foreach ($data['vendor_infos'] as $v) {
|
||||||
|
$prices = [];
|
||||||
|
if (!empty($v['prices']) && is_array($v['prices'])) {
|
||||||
|
foreach ($v['prices'] as $p) {
|
||||||
|
$prices[] = new PriceDTO(
|
||||||
|
minimum_discount_amount: (int) ($p['minimum_quantity'] ?? 1),
|
||||||
|
price: (string) ($p['price'] ?? 0),
|
||||||
|
currency_iso_code: $p['currency'] ?? 'USD',
|
||||||
|
price_related_quantity: (int) ($p['minimum_quantity'] ?? 1),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$vendorInfos[] = new PurchaseInfoDTO(
|
||||||
|
distributor_name: $v['distributor_name'] ?? $distributorNameFallback,
|
||||||
|
order_number: $v['order_number'] ?? 'Unknown',
|
||||||
|
prices: $prices,
|
||||||
|
product_url: $v['product_url'] ?? $productUrl,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get preview image URL
|
||||||
|
$previewImageUrl = null;
|
||||||
|
if (!empty($data['images']) && is_array($data['images']) && !empty($data['images'][0]['url'])) {
|
||||||
|
$previewImageUrl = $data['images'][0]['url'];
|
||||||
|
}
|
||||||
|
|
||||||
|
return new PartDetailDTO(
|
||||||
|
provider_key: $providerKey,
|
||||||
|
provider_id: $providerId,
|
||||||
|
name: $data['name'] ?? 'Unknown',
|
||||||
|
description: $data['description'] ?? '',
|
||||||
|
category: $data['category'] ?? null,
|
||||||
|
manufacturer: $data['manufacturer'] ?? null,
|
||||||
|
mpn: $data['mpn'] ?? null,
|
||||||
|
preview_image_url: $previewImageUrl,
|
||||||
|
manufacturing_status: $manufacturingStatus,
|
||||||
|
provider_url: $productUrl,
|
||||||
|
footprint: $data['footprint'] ?? null,
|
||||||
|
notes: null,
|
||||||
|
datasheets: $datasheets,
|
||||||
|
images: $images,
|
||||||
|
parameters: $parameters,
|
||||||
|
vendor_infos: $vendorInfos,
|
||||||
|
mass: isset($data['mass']) && is_numeric($data['mass']) ? (float) $data['mass'] : null,
|
||||||
|
manufacturer_product_url: $data['manufacturer_product_url'] ?? null,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -24,32 +24,31 @@ declare(strict_types=1);
|
||||||
|
|
||||||
namespace App\Services\InfoProviderSystem\Providers;
|
namespace App\Services\InfoProviderSystem\Providers;
|
||||||
|
|
||||||
use App\Entity\Parts\ManufacturingStatus;
|
use App\Exceptions\ProviderIDNotSupportedException;
|
||||||
use App\Services\InfoProviderSystem\DTOs\FileDTO;
|
use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
|
||||||
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
|
|
||||||
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||||
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
|
|
||||||
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
|
|
||||||
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
|
|
||||||
use App\Settings\InfoProviderSystem\AIExtractorSettings;
|
use App\Settings\InfoProviderSystem\AIExtractorSettings;
|
||||||
use Symfony\AI\Platform\Message\Message;
|
use Symfony\AI\Platform\Message\Message;
|
||||||
use Symfony\AI\Platform\Message\MessageBag;
|
use Symfony\AI\Platform\Message\MessageBag;
|
||||||
use Symfony\AI\Platform\Platform;
|
|
||||||
use Symfony\AI\Platform\PlatformInterface;
|
use Symfony\AI\Platform\PlatformInterface;
|
||||||
use Symfony\Component\DependencyInjection\Attribute\Autowire;
|
use Symfony\Component\DependencyInjection\Attribute\Autowire;
|
||||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||||
|
|
||||||
class AIInfoExtractor implements InfoProviderInterface
|
final class AIInfoExtractor implements InfoProviderInterface
|
||||||
{
|
{
|
||||||
|
use FixAndValidateUrlTrait;
|
||||||
|
|
||||||
private const DISTRIBUTOR_NAME = 'AI Extracted';
|
private const DISTRIBUTOR_NAME = 'AI Extracted';
|
||||||
|
|
||||||
private readonly HttpClientInterface $httpClient;
|
private readonly HttpClientInterface $httpClient;
|
||||||
|
|
||||||
public function __construct(HttpClientInterface $httpClient, private readonly AIExtractorSettings $settings,
|
public function __construct(
|
||||||
|
HttpClientInterface $httpClient,
|
||||||
|
private readonly AIExtractorSettings $settings,
|
||||||
#[Autowire(service: "ai.traceable_platform.openrouter")]
|
#[Autowire(service: "ai.traceable_platform.openrouter")]
|
||||||
private readonly PlatformInterface $aiPlatform
|
private readonly PlatformInterface $aiPlatform,
|
||||||
)
|
private readonly DTOJsonSchemaConverter $jsonSchemaConverter,
|
||||||
{
|
) {
|
||||||
$this->httpClient = $httpClient->withOptions([
|
$this->httpClient = $httpClient->withOptions([
|
||||||
'timeout' => 30,
|
'timeout' => 30,
|
||||||
'headers' => [
|
'headers' => [
|
||||||
|
|
@ -82,36 +81,17 @@ class AIInfoExtractor implements InfoProviderInterface
|
||||||
|
|
||||||
public function searchByKeyword(string $keyword): array
|
public function searchByKeyword(string $keyword): array
|
||||||
{
|
{
|
||||||
// Treat the keyword as a URL and return a single search result
|
try {
|
||||||
$url = $this->normalizeURL($keyword);
|
return [
|
||||||
|
$this->getDetails($keyword)
|
||||||
//try {
|
]; } catch (ProviderIDNotSupportedException $e) {
|
||||||
$part = $this->getDetails($url);
|
return [];
|
||||||
return [
|
}
|
||||||
new SearchResultDTO(
|
|
||||||
provider_key: $this->getProviderKey(),
|
|
||||||
provider_id: $url,
|
|
||||||
name: $part->name,
|
|
||||||
description: $part->description,
|
|
||||||
category: $part->category,
|
|
||||||
manufacturer: $part->manufacturer,
|
|
||||||
mpn: $part->mpn,
|
|
||||||
preview_image_url: $part->preview_image_url,
|
|
||||||
manufacturing_status: $part->manufacturing_status,
|
|
||||||
provider_url: $part->provider_url,
|
|
||||||
footprint: $part->footprint,
|
|
||||||
gtin: $part->gtin,
|
|
||||||
),
|
|
||||||
];
|
|
||||||
//} catch (\Throwable $e) {
|
|
||||||
// // Return empty array on error
|
|
||||||
// return [];
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getDetails(string $id): PartDetailDTO
|
public function getDetails(string $id): PartDetailDTO
|
||||||
{
|
{
|
||||||
$url = $this->normalizeURL($id);
|
$url = $this->fixAndValidateURL($id);
|
||||||
|
|
||||||
// Fetch HTML content
|
// Fetch HTML content
|
||||||
$response = $this->httpClient->request('GET', $url);
|
$response = $this->httpClient->request('GET', $url);
|
||||||
|
|
@ -123,14 +103,11 @@ class AIInfoExtractor implements InfoProviderInterface
|
||||||
// Truncate to max content length
|
// Truncate to max content length
|
||||||
$truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength);
|
$truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength);
|
||||||
|
|
||||||
// Call OpenRouter API
|
// Call LLM
|
||||||
$llmResponse = $this->callOpenRouterAPI($truncatedHtml, $url);
|
$llmResponse = $this->callLLM($truncatedHtml, $url);
|
||||||
|
|
||||||
// Parse JSON response
|
|
||||||
$data = json_decode($llmResponse, true, 512, JSON_THROW_ON_ERROR);
|
|
||||||
|
|
||||||
// Build and return PartDetailDTO
|
// Build and return PartDetailDTO
|
||||||
return $this->buildPartDetailDTO($data, $url);
|
return $this->jsonSchemaConverter->jsonToDTO($llmResponse, $this->getProviderKey(), $url, $url, self::DISTRIBUTOR_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getCapabilities(): array
|
public function getCapabilities(): array
|
||||||
|
|
@ -144,21 +121,6 @@ class AIInfoExtractor implements InfoProviderInterface
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function normalizeURL(string $url): string
|
|
||||||
{
|
|
||||||
// Add https:// if no protocol
|
|
||||||
if (!preg_match('/^https?:\/\//', $url)) {
|
|
||||||
$url = 'https://' . ltrim($url, '/');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate URL
|
|
||||||
if (filter_var($url, FILTER_VALIDATE_URL) === false) {
|
|
||||||
throw new \InvalidArgumentException("Invalid URL: $url");
|
|
||||||
}
|
|
||||||
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function cleanHTML(string $html): string
|
private function cleanHTML(string $html): string
|
||||||
{
|
{
|
||||||
// Remove script tags
|
// Remove script tags
|
||||||
|
|
@ -201,249 +163,24 @@ class AIInfoExtractor implements InfoProviderInterface
|
||||||
return $truncated;
|
return $truncated;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function callOpenRouterAPI(string $htmlContent, string $url): string
|
private function callLLM(string $htmlContent, string $url): array
|
||||||
{
|
{
|
||||||
$input = new MessageBag(
|
$input = new MessageBag(
|
||||||
Message::forSystem($this->buildSystemPrompt()),
|
Message::forSystem($this->buildSystemPrompt()),
|
||||||
Message::ofUser("Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent")
|
Message::ofUser("Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent")
|
||||||
);
|
);
|
||||||
|
|
||||||
$models = $this->aiPlatform->getModelCatalog()->getModels();
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
//'openai/gpt-5-mini'
|
//'openai/gpt-5-mini'
|
||||||
$result = $this->aiPlatform->invoke('openrouter/auto', $input, [
|
$result = $this->aiPlatform->invoke('openrouter/auto', $input, [
|
||||||
'response_format' => 'json_schema',
|
'response_format' => 'json_schema',
|
||||||
'json_schema' => [
|
'json_schema' => $this->jsonSchemaConverter->getJSONSchema(),
|
||||||
'name' => 'clock',
|
|
||||||
'strict' => true,
|
|
||||||
'schema' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'name' => ['type' => 'string', 'description' => 'Product name'],
|
|
||||||
'description' => ['type' => 'string', 'description' => 'Product description'],
|
|
||||||
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
|
|
||||||
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
|
|
||||||
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
|
|
||||||
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
|
|
||||||
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
|
|
||||||
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
|
|
||||||
'parameters' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'name' => ['type' => 'string'],
|
|
||||||
'value' => ['type' => 'string'],
|
|
||||||
'unit' => ['type' => ['string', 'null']],
|
|
||||||
],
|
|
||||||
'required' => ['name', 'value'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'datasheets' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'url' => ['type' => 'string'],
|
|
||||||
'description' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'images' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'url' => ['type' => 'string'],
|
|
||||||
'description' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'vendor_infos' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'distributor_name' => ['type' => 'string'],
|
|
||||||
'order_number' => ['type' => ['string', 'null']],
|
|
||||||
'product_url' => ['type' => 'string'],
|
|
||||||
'prices' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'minimum_quantity' => ['type' => 'integer'],
|
|
||||||
'price' => ['type' => 'number'],
|
|
||||||
'currency' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['minimum_quantity', 'price', 'currency'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'required' => ['distributor_name', 'product_url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
|
|
||||||
],
|
|
||||||
'required' => ['name', 'description'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
]);
|
]);
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
dump($e);
|
throw new \RuntimeException('LLM invocation failed: '.$e->getMessage(), previous: $e);
|
||||||
throw new \RuntimeException('LLM invocation failed: ' . $e->getMessage(), previous: $e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $result->getResult()->getContent();
|
||||||
|
|
||||||
dump($result->getResult()->getContent());
|
|
||||||
|
|
||||||
return json_encode($result->getResult()->getContent());
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
$systemPrompt = $this->buildSystemPrompt();
|
|
||||||
|
|
||||||
// Define the tool/function for structured output
|
|
||||||
$toolDefinition = [
|
|
||||||
'type' => 'function',
|
|
||||||
'function' => [
|
|
||||||
'name' => 'extract_part_info',
|
|
||||||
'description' => 'Extract electronic component information from a webpage',
|
|
||||||
'parameters' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'name' => ['type' => 'string', 'description' => 'Product name'],
|
|
||||||
'description' => ['type' => 'string', 'description' => 'Product description'],
|
|
||||||
'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'],
|
|
||||||
'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'],
|
|
||||||
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
|
|
||||||
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
|
|
||||||
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
|
|
||||||
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'],
|
|
||||||
'parameters' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'name' => ['type' => 'string'],
|
|
||||||
'value' => ['type' => 'string'],
|
|
||||||
'unit' => ['type' => ['string', 'null']],
|
|
||||||
],
|
|
||||||
'required' => ['name', 'value'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'datasheets' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'url' => ['type' => 'string'],
|
|
||||||
'description' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'images' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'url' => ['type' => 'string'],
|
|
||||||
'description' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'vendor_infos' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'distributor_name' => ['type' => 'string'],
|
|
||||||
'order_number' => ['type' => ['string', 'null']],
|
|
||||||
'product_url' => ['type' => 'string'],
|
|
||||||
'prices' => [
|
|
||||||
'type' => 'array',
|
|
||||||
'items' => [
|
|
||||||
'type' => 'object',
|
|
||||||
'properties' => [
|
|
||||||
'minimum_quantity' => ['type' => 'integer'],
|
|
||||||
'price' => ['type' => 'number'],
|
|
||||||
'currency' => ['type' => 'string'],
|
|
||||||
],
|
|
||||||
'required' => ['minimum_quantity', 'price', 'currency'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'required' => ['distributor_name', 'product_url'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'],
|
|
||||||
],
|
|
||||||
'required' => ['name', 'description'],
|
|
||||||
],
|
|
||||||
],
|
|
||||||
];
|
|
||||||
|
|
||||||
$payload = [
|
|
||||||
'model' => $this->settings->model,
|
|
||||||
'messages' => [
|
|
||||||
[
|
|
||||||
'role' => 'system',
|
|
||||||
'content' => $systemPrompt,
|
|
||||||
],
|
|
||||||
[
|
|
||||||
'role' => 'user',
|
|
||||||
'content' => "Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent",
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'tools' => [$toolDefinition],
|
|
||||||
'tool_choice' => ['type' => 'function', 'function' => ['name' => 'extract_part_info']],
|
|
||||||
'max_tokens' => 4096,
|
|
||||||
'temperature' => 0.1,
|
|
||||||
];
|
|
||||||
|
|
||||||
$response = $this->httpClient->request('POST', 'https://openrouter.ai/api/v1/chat/completions', [
|
|
||||||
'headers' => [
|
|
||||||
'Authorization' => 'Bearer ' . $this->settings->apiKey,
|
|
||||||
'Content-Type' => 'application/json',
|
|
||||||
'HTTP-Referer' => 'https://github.com/Part-DB/Part-DB-server',
|
|
||||||
'X-Title' => 'Part-DB AI Info Extractor',
|
|
||||||
],
|
|
||||||
'json' => $payload,
|
|
||||||
]);
|
|
||||||
|
|
||||||
$data = $response->toArray();
|
|
||||||
|
|
||||||
$message = $data['choices'][0]['message'] ?? null;
|
|
||||||
if ($message === null) {
|
|
||||||
throw new \RuntimeException('No response message from LLM');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the model used the tool/function call
|
|
||||||
if (isset($message['tool_calls']) && !empty($message['tool_calls'])) {
|
|
||||||
foreach ($message['tool_calls'] as $toolCall) {
|
|
||||||
if ($toolCall['function']['name'] === 'extract_part_info') {
|
|
||||||
return $toolCall['function']['arguments'];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to content if no tool call (some models might not support tool calling)
|
|
||||||
$content = $message['content'] ?? throw new \RuntimeException('No response content from LLM');
|
|
||||||
|
|
||||||
// Strip markdown code blocks if present (fallback for models without tool support)
|
|
||||||
$content = preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
|
|
||||||
$content = preg_replace('/\n?```\s*$/i', '', $content);
|
|
||||||
$content = trim($content);
|
|
||||||
|
|
||||||
return $content;
|
|
||||||
*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildSystemPrompt(): string
|
private function buildSystemPrompt(): string
|
||||||
|
|
@ -485,119 +222,4 @@ For parameters, combine name, value, and unit. The unit should be separate if po
|
||||||
PROMPT;
|
PROMPT;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildPartDetailDTO(array $data, string $url): PartDetailDTO
|
|
||||||
{
|
|
||||||
// Map manufacturing status
|
|
||||||
$manufacturingStatus = null;
|
|
||||||
if (!empty($data['manufacturing_status'])) {
|
|
||||||
$status = strtolower((string) $data['manufacturing_status']);
|
|
||||||
$manufacturingStatus = match ($status) {
|
|
||||||
'active' => ManufacturingStatus::ACTIVE,
|
|
||||||
'obsolete', 'discontinued' => ManufacturingStatus::DISCONTINUED,
|
|
||||||
'nrfnd', 'not recommended for new designs' => ManufacturingStatus::NRFND,
|
|
||||||
'eol' => ManufacturingStatus::EOL,
|
|
||||||
'announced' => ManufacturingStatus::ANNOUNCED,
|
|
||||||
default => null,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build parameters
|
|
||||||
$parameters = null;
|
|
||||||
if (!empty($data['parameters']) && is_array($data['parameters'])) {
|
|
||||||
$parameters = [];
|
|
||||||
foreach ($data['parameters'] as $p) {
|
|
||||||
if (!empty($p['name'])) {
|
|
||||||
$value = $p['value'] ?? '';
|
|
||||||
$unit = $p['unit'] ?? null;
|
|
||||||
// Combine value and unit for parsing
|
|
||||||
$valueWithUnit = $unit ? $value . ' ' . $unit : $value;
|
|
||||||
$parameters[] = ParameterDTO::parseValueField(
|
|
||||||
name: $p['name'],
|
|
||||||
value: $valueWithUnit
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build datasheets
|
|
||||||
$datasheets = null;
|
|
||||||
if (!empty($data['datasheets']) && is_array($data['datasheets'])) {
|
|
||||||
$datasheets = [];
|
|
||||||
foreach ($data['datasheets'] as $d) {
|
|
||||||
if (!empty($d['url'])) {
|
|
||||||
$datasheets[] = new FileDTO(
|
|
||||||
url: $d['url'],
|
|
||||||
name: $d['description'] ?? 'Datasheet'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build images
|
|
||||||
$images = null;
|
|
||||||
if (!empty($data['images']) && is_array($data['images'])) {
|
|
||||||
$images = [];
|
|
||||||
foreach ($data['images'] as $i) {
|
|
||||||
if (!empty($i['url'])) {
|
|
||||||
$images[] = new FileDTO(
|
|
||||||
url: $i['url'],
|
|
||||||
name: $i['description'] ?? 'Image'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build vendor infos
|
|
||||||
$vendorInfos = null;
|
|
||||||
if (!empty($data['vendor_infos']) && is_array($data['vendor_infos'])) {
|
|
||||||
$vendorInfos = [];
|
|
||||||
foreach ($data['vendor_infos'] as $v) {
|
|
||||||
$prices = [];
|
|
||||||
if (!empty($v['prices']) && is_array($v['prices'])) {
|
|
||||||
foreach ($v['prices'] as $p) {
|
|
||||||
$prices[] = new PriceDTO(
|
|
||||||
minimum_discount_amount: (int) ($p['minimum_quantity'] ?? 1),
|
|
||||||
price: (string) ($p['price'] ?? 0),
|
|
||||||
currency_iso_code: $p['currency'] ?? 'USD',
|
|
||||||
price_related_quantity: (int) ($p['minimum_quantity'] ?? 1),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$vendorInfos[] = new PurchaseInfoDTO(
|
|
||||||
distributor_name: $v['distributor_name'] ?? self::DISTRIBUTOR_NAME,
|
|
||||||
order_number: $v['order_number'] ?? 'Unknown',
|
|
||||||
prices: $prices,
|
|
||||||
product_url: $v['product_url'] ?? $url,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get preview image URL
|
|
||||||
$previewImageUrl = null;
|
|
||||||
if (!empty($data['images']) && is_array($data['images']) && !empty($data['images'][0]['url'])) {
|
|
||||||
$previewImageUrl = $data['images'][0]['url'];
|
|
||||||
}
|
|
||||||
|
|
||||||
return new PartDetailDTO(
|
|
||||||
provider_key: $this->getProviderKey(),
|
|
||||||
provider_id: $url,
|
|
||||||
name: $data['name'] ?? 'Unknown',
|
|
||||||
description: $data['description'] ?? '',
|
|
||||||
category: $data['category'] ?? null,
|
|
||||||
manufacturer: $data['manufacturer'] ?? null,
|
|
||||||
mpn: $data['mpn'] ?? null,
|
|
||||||
preview_image_url: $previewImageUrl,
|
|
||||||
manufacturing_status: $manufacturingStatus,
|
|
||||||
provider_url: $url,
|
|
||||||
footprint: $data['footprint'] ?? null,
|
|
||||||
mass: isset($data['mass']) && is_numeric($data['mass']) ? (float) $data['mass'] : null,
|
|
||||||
notes: null,
|
|
||||||
datasheets: $datasheets,
|
|
||||||
images: $images,
|
|
||||||
parameters: $parameters,
|
|
||||||
vendor_infos: $vendorInfos,
|
|
||||||
manufacturer_product_url: $data['manufacturer_product_url'] ?? null,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
|
||||||
|
*
|
||||||
|
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
|
||||||
|
namespace App\Services\InfoProviderSystem\Providers;
|
||||||
|
|
||||||
|
use App\Exceptions\ProviderIDNotSupportedException;
|
||||||
|
|
||||||
|
trait FixAndValidateUrlTrait
|
||||||
|
{
|
||||||
|
private function fixAndValidateURL(string $url): string
|
||||||
|
{
|
||||||
|
$originalUrl = $url;
|
||||||
|
|
||||||
|
//Add scheme if missing
|
||||||
|
if (!preg_match('/^https?:\/\//', $url)) {
|
||||||
|
//Remove any leading slashes
|
||||||
|
$url = ltrim($url, '/');
|
||||||
|
|
||||||
|
//If the URL starts with https:/ or http:/, add the missing slash
|
||||||
|
//Traefik removes the double slash as secruity measure, so we want to be forgiving and add it back if needed
|
||||||
|
//See https://github.com/Part-DB/Part-DB-server/issues/1296
|
||||||
|
if (preg_match('/^https?:\/[^\/]/', $url)) {
|
||||||
|
$url = preg_replace('/^(https?:)\/([^\/])/', '$1//$2', $url);
|
||||||
|
} else {
|
||||||
|
$url = 'https://'.$url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//If this is not a valid URL with host, domain and path, throw an exception
|
||||||
|
if (filter_var($url, FILTER_VALIDATE_URL) === false ||
|
||||||
|
parse_url($url, PHP_URL_HOST) === null ||
|
||||||
|
parse_url($url, PHP_URL_PATH) === null) {
|
||||||
|
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -48,6 +48,8 @@ use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||||
class GenericWebProvider implements InfoProviderInterface
|
class GenericWebProvider implements InfoProviderInterface
|
||||||
{
|
{
|
||||||
|
|
||||||
|
use FixAndValidateUrlTrait;
|
||||||
|
|
||||||
public const DISTRIBUTOR_NAME = 'Website';
|
public const DISTRIBUTOR_NAME = 'Website';
|
||||||
|
|
||||||
private readonly HttpClientInterface $httpClient;
|
private readonly HttpClientInterface $httpClient;
|
||||||
|
|
@ -308,34 +310,7 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function fixAndValidateURL(string $url): string
|
|
||||||
{
|
|
||||||
$originalUrl = $url;
|
|
||||||
|
|
||||||
//Add scheme if missing
|
|
||||||
if (!preg_match('/^https?:\/\//', $url)) {
|
|
||||||
//Remove any leading slashes
|
|
||||||
$url = ltrim($url, '/');
|
|
||||||
|
|
||||||
//If the URL starts with https:/ or http:/, add the missing slash
|
|
||||||
//Traefik removes the double slash as secruity measure, so we want to be forgiving and add it back if needed
|
|
||||||
//See https://github.com/Part-DB/Part-DB-server/issues/1296
|
|
||||||
if (preg_match('/^https?:\/[^\/]/', $url)) {
|
|
||||||
$url = preg_replace('/^(https?:)\/([^\/])/', '$1//$2', $url);
|
|
||||||
} else {
|
|
||||||
$url = 'https://'.$url;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//If this is not a valid URL with host, domain and path, throw an exception
|
|
||||||
if (filter_var($url, FILTER_VALIDATE_URL) === false ||
|
|
||||||
parse_url($url, PHP_URL_HOST) === null ||
|
|
||||||
parse_url($url, PHP_URL_PATH) === null) {
|
|
||||||
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$originalUrl);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO
|
public function getDetails(string $id, bool $check_for_delegation = true): PartDetailDTO
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue