Merge branch 'generic_webshop'

This commit is contained in:
Jan Böhmer 2026-02-01 18:26:30 +01:00
commit 79f88c66d6
11 changed files with 571 additions and 5 deletions

View file

@ -96,6 +96,21 @@ The following providers are currently available and shipped with Part-DB:
(All trademarks are property of their respective owners. Part-DB is not affiliated with any of the companies.)
### Generic Web URL Provider
The Generic Web URL Provider can extract part information from any webpage that contains structured data in the form of
[Schema.org](https://schema.org/) format. Many e-commerce websites use this format to provide detailed product information
for search engines and other services. Therefore it allows Part-DB to retrieve rudimentary part information (like name, image and price)
from a wide range of websites without the need for a dedicated API integration.
To use the Generic Web URL Provider, simply enable it in the information provider settings. No additional configuration
is required. Afterwards you can enter any product URL in the search field, and Part-DB will attempt to extract the relevant part information
from the webpage.
Please note that if this provider is enabled, Part-DB will make HTTP requests to external websites to fetch product data, which
may have privacy and security implications.
Following env configuration options are available:
* `PROVIDER_GENERIC_WEB_ENABLED`: Set this to `1` to enable the Generic Web URL Provider (optional, default: `0`)
### Octopart
The Octopart provider uses the [Octopart / Nexar API](https://nexar.com/api) to search for parts and get information.

View file

@ -30,6 +30,7 @@ use App\Form\InfoProviderSystem\PartSearchType;
use App\Services\InfoProviderSystem\ExistingPartFinder;
use App\Services\InfoProviderSystem\PartInfoRetriever;
use App\Services\InfoProviderSystem\ProviderRegistry;
use App\Services\InfoProviderSystem\Providers\GenericWebProvider;
use App\Settings\AppSettings;
use App\Settings\InfoProviderSystem\InfoProviderGeneralSettings;
use Doctrine\ORM\EntityManagerInterface;
@ -39,6 +40,7 @@ use Psr\Log\LoggerInterface;
use Symfony\Bridge\Doctrine\Attribute\MapEntity;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\Form\Extension\Core\Type\SubmitType;
use Symfony\Component\Form\Extension\Core\Type\UrlType;
use Symfony\Component\HttpClient\Exception\ClientException;
use Symfony\Component\HttpClient\Exception\TransportException;
use Symfony\Component\HttpFoundation\Request;
@ -208,4 +210,58 @@ class InfoProviderController extends AbstractController
'update_target' => $update_target
]);
}
#[Route('/from_url', name: 'info_providers_from_url')]
public function fromURL(Request $request, GenericWebProvider $provider): Response
{
$this->denyAccessUnlessGranted('@info_providers.create_parts');
if (!$provider->isActive()) {
$this->addFlash('error', "Generic Web Provider is not active. Please enable it in the provider settings.");
return $this->redirectToRoute('info_providers_list');
}
$formBuilder = $this->createFormBuilder();
$formBuilder->add('url', UrlType::class, [
'label' => 'info_providers.from_url.url.label',
'required' => true,
]);
$formBuilder->add('submit', SubmitType::class, [
'label' => 'info_providers.search.submit',
]);
$form = $formBuilder->getForm();
$form->handleRequest($request);
$partDetail = null;
if ($form->isSubmitted() && $form->isValid()) {
//Try to retrieve the part detail from the given URL
$url = $form->get('url')->getData();
try {
$searchResult = $this->infoRetriever->searchByKeyword(
keyword: $url,
providers: [$provider]
);
if (count($searchResult) === 0) {
$this->addFlash('warning', t('info_providers.from_url.no_part_found'));
} else {
$searchResult = $searchResult[0];
//Redirect to the part creation page with the found part detail
return $this->redirectToRoute('info_providers_create_part', [
'providerKey' => $searchResult->provider_key,
'providerId' => $searchResult->provider_id,
]);
}
} catch (ExceptionInterface $e) {
$this->addFlash('error', t('info_providers.search.error.general_exception', ['%type%' => (new \ReflectionClass($e))->getShortName()]));
}
}
return $this->render('info_providers/from_url/from_url.html.twig', [
'form' => $form,
'partDetail' => $partDetail,
]);
}
}

View file

@ -0,0 +1,32 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Exceptions;
class ProviderIDNotSupportedException extends \RuntimeException
{
public function fromProvider(string $providerKey, string $id): self
{
return new self(sprintf('The given ID %s is not supported by the provider %s.', $id, $providerKey,));
}
}

View file

@ -0,0 +1,336 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers;
use App\Exceptions\ProviderIDNotSupportedException;
use App\Services\InfoProviderSystem\DTOs\ParameterDTO;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
use PhpOffice\PhpSpreadsheet\Calculation\Financial\Securities\Price;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class GenericWebProvider implements InfoProviderInterface
{
public const DISTRIBUTOR_NAME = 'Website';
private readonly HttpClientInterface $httpClient;
public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings)
{
$this->httpClient = $httpClient->withOptions(
[
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
],
'timeout' => 15,
]
);
}
public function getProviderInfo(): array
{
return [
'name' => 'Generic Web URL',
'description' => 'Tries to extract a part from a given product webpage URL using common metadata standards like JSON-LD and OpenGraph.',
//'url' => 'https://example.com',
'disabled_help' => 'Enable in settings to use this provider',
'settings_class' => GenericWebProviderSettings::class,
];
}
public function getProviderKey(): string
{
return 'generic_web';
}
public function isActive(): bool
{
return $this->settings->enabled;
}
public function searchByKeyword(string $keyword): array
{
try {
return [
$this->getDetails($keyword)
]; } catch (ProviderIDNotSupportedException $e) {
return [];
}
}
private function extractShopName(string $url): string
{
$host = parse_url($url, PHP_URL_HOST);
if ($host === false || $host === null) {
return self::DISTRIBUTOR_NAME;
}
return $host;
}
private function productJsonLdToPart(array $jsonLd, string $url, Crawler $dom): PartDetailDTO
{
$notes = $jsonLd['description'] ?? "";
if (isset($jsonLd['disambiguatingDescription'])) {
if (!empty($notes)) {
$notes .= "\n\n";
}
$notes .= $jsonLd['disambiguatingDescription'];
}
$vendor_infos = null;
if (isset($jsonLd['offers'])) {
if (array_is_list($jsonLd['offers'])) {
$offer = $jsonLd['offers'][0];
} else {
$offer = $jsonLd['offers'];
}
//Make $jsonLd['url'] absolute if it's relative
if (isset($jsonLd['url']) && parse_url($jsonLd['url'], PHP_URL_SCHEME) === null) {
$parsedUrl = parse_url($url);
$scheme = $parsedUrl['scheme'] ?? 'https';
$host = $parsedUrl['host'] ?? '';
$jsonLd['url'] = $scheme.'://'.$host.$jsonLd['url'];
}
$prices = [];
if (isset($offer['price'])) {
$prices[] = new PriceDTO(
minimum_discount_amount: 1,
price: (string) $offer['price'],
currency_iso_code: $offer['priceCurrency'] ?? null
);
} else if (isset($offer['offers']) && array_is_list($offer['offers'])) {
//Some sites nest offers
foreach ($offer['offers'] as $subOffer) {
if (isset($subOffer['price'])) {
$prices[] = new PriceDTO(
minimum_discount_amount: 1,
price: (string) $subOffer['price'],
currency_iso_code: $subOffer['priceCurrency'] ?? null
);
}
}
}
$vendor_infos = [new PurchaseInfoDTO(
distributor_name: $this->extractShopName($url),
order_number: (string) ($jsonLd['sku'] ?? $jsonLd['@id'] ?? $jsonLd['gtin'] ?? 'Unknown'),
prices: $prices,
product_url: $jsonLd['url'] ?? $url,
)];
}
$image = null;
if (isset($jsonLd['image'])) {
if (is_array($jsonLd['image'])) {
if (array_is_list($jsonLd['image'])) {
$image = $jsonLd['image'][0] ?? null;
}
} elseif (is_string($jsonLd['image'])) {
$image = $jsonLd['image'];
}
}
//If image is an object with @type ImageObject, extract the url
if (is_array($image) && isset($image['@type']) && $image['@type'] === 'ImageObject') {
$image = $image['contentUrl'] ?? $image['url'] ?? null;
}
//Try to extract parameters from additionalProperty
$parameters = [];
if (isset($jsonLd['additionalProperty']) && array_is_list($jsonLd['additionalProperty'])) {
foreach ($jsonLd['additionalProperty'] as $property) { //TODO: Handle minValue and maxValue
if (isset ($property['unitText'])) {
$parameters[] = ParameterDTO::parseValueField(
name: $property['name'] ?? 'Unknown',
value: $property['value'] ?? '',
unit: $property['unitText']
);
} else {
$parameters[] = ParameterDTO::parseValueIncludingUnit(
name: $property['name'] ?? 'Unknown',
value: $property['value'] ?? ''
);
}
}
}
return new PartDetailDTO(
provider_key: $this->getProviderKey(),
provider_id: $url,
name: $jsonLd ['name'] ?? 'Unknown Name',
description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '',
category: isset($jsonLd['category']) && is_string($jsonLd['category']) ? $jsonLd['category'] : null,
manufacturer: $jsonLd['manufacturer']['name'] ?? $jsonLd['brand']['name'] ?? null,
mpn: $jsonLd['mpn'] ?? null,
preview_image_url: $image,
provider_url: $url,
notes: $notes,
parameters: $parameters,
vendor_infos: $vendor_infos,
mass: isset($jsonLd['weight']['value']) ? (float)$jsonLd['weight']['value'] : null,
);
}
/**
* Decodes JSON in a forgiving way, trying to fix common issues.
* @param string $json
* @return array
* @throws \JsonException
*/
private function json_decode_forgiving(string $json): array
{
//Sanitize common issues
$json = preg_replace("/[\r\n]+/", " ", $json);
return json_decode($json, true, 512, JSON_THROW_ON_ERROR);
}
private function getMetaContent(Crawler $dom, string $name): ?string
{
$meta = $dom->filter('meta[property="'.$name.'"]');
if ($meta->count() > 0) {
return $meta->attr('content');
}
//Try name attribute
$meta = $dom->filter('meta[name="'.$name.'"]');
if ($meta->count() > 0) {
return $meta->attr('content');
}
return null;
}
public function getDetails(string $id): PartDetailDTO
{
//Add scheme if missing
if (!preg_match('/^https?:\/\//', $id)) {
//Remove any leading slashes
$id = ltrim($id, '/');
$id = 'https://'.$id;
}
$url = $id;
//If this is not a valid URL with host, domain and path, throw an exception
if (filter_var($url, FILTER_VALIDATE_URL) === false ||
parse_url($url, PHP_URL_HOST) === null ||
parse_url($url, PHP_URL_PATH) === null) {
throw new ProviderIDNotSupportedException("The given ID is not a valid URL: ".$id);
}
//Try to get the webpage content
$response = $this->httpClient->request('GET', $url);
$content = $response->getContent();
$dom = new Crawler($content);
//Try to determine a canonical URL
$canonicalURL = $url;
if ($dom->filter('link[rel="canonical"]')->count() > 0) {
$canonicalURL = $dom->filter('link[rel="canonical"]')->attr('href');
} else if ($dom->filter('meta[property="og:url"]')->count() > 0) {
$canonicalURL = $dom->filter('meta[property="og:url"]')->attr('content');
}
//If the canonical URL is relative, make it absolute
if (parse_url($canonicalURL, PHP_URL_SCHEME) === null) {
$parsedUrl = parse_url($url);
$scheme = $parsedUrl['scheme'] ?? 'https';
$host = $parsedUrl['host'] ?? '';
$canonicalURL = $scheme.'://'.$host.$canonicalURL;
}
//Try to find json-ld data in the head
$jsonLdNodes = $dom->filter('script[type="application/ld+json"]');
foreach ($jsonLdNodes as $node) {
$jsonLd = $this->json_decode_forgiving($node->textContent);
//If the content of json-ld is an array, try to find a product inside
if (!array_is_list($jsonLd)) {
$jsonLd = [$jsonLd];
}
foreach ($jsonLd as $item) {
if (isset($item['@type']) && $item['@type'] === 'Product') {
return $this->productJsonLdToPart($item, $canonicalURL, $dom);
}
}
}
//If no JSON-LD data is found, try to extract basic data from meta tags
$pageTitle = $dom->filter('title')->count() > 0 ? $dom->filter('title')->text() : 'Unknown';
$prices = [];
if ($price = $this->getMetaContent($dom, 'product:price:amount')) {
$prices[] = new PriceDTO(
minimum_discount_amount: 1,
price: $price,
currency_iso_code: $this->getMetaContent($dom, 'product:price:currency'),
);
} else {
//Amazon fallback
$amazonAmount = $dom->filter('input[type="hidden"][name*="amount"]');
if ($amazonAmount->count() > 0) {
$prices[] = new PriceDTO(
minimum_discount_amount: 1,
price: $amazonAmount->first()->attr('value'),
currency_iso_code: $dom->filter('input[type="hidden"][name*="currencyCode"]')->first()->attr('value'),
);
}
}
$vendor_infos = [new PurchaseInfoDTO(
distributor_name: $this->extractShopName($canonicalURL),
order_number: 'Unknown',
prices: $prices,
product_url: $canonicalURL,
)];
return new PartDetailDTO(
provider_key: $this->getProviderKey(),
provider_id: $canonicalURL,
name: $this->getMetaContent($dom, 'og:title') ?? $pageTitle,
description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '',
manufacturer: $this->getMetaContent($dom, 'product:brand'),
preview_image_url: $this->getMetaContent($dom, 'og:image'),
provider_url: $canonicalURL,
vendor_infos: $vendor_infos,
);
}
public function getCapabilities(): array
{
return [
ProviderCapabilities::BASIC,
ProviderCapabilities::PICTURE,
ProviderCapabilities::PRICE
];
}
}

View file

@ -39,6 +39,8 @@ use App\Entity\UserSystem\User;
use App\Helpers\Trees\TreeViewNode;
use App\Services\Cache\UserCacheKeyGenerator;
use App\Services\ElementTypeNameGenerator;
use App\Services\InfoProviderSystem\Providers\GenericWebProvider;
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
use Symfony\Bundle\SecurityBundle\Security;
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
use Symfony\Contracts\Cache\ItemInterface;
@ -58,6 +60,7 @@ class ToolsTreeBuilder
protected UserCacheKeyGenerator $keyGenerator,
protected Security $security,
private readonly ElementTypeNameGenerator $elementTypeNameGenerator,
private readonly GenericWebProviderSettings $genericWebProviderSettings
) {
}
@ -147,6 +150,13 @@ class ToolsTreeBuilder
$this->urlGenerator->generate('info_providers_search')
))->setIcon('fa-treeview fa-fw fa-solid fa-cloud-arrow-down');
if ($this->genericWebProviderSettings->enabled) {
$nodes[] = (new TreeViewNode(
$this->translator->trans('info_providers.from_url.title'),
$this->urlGenerator->generate('info_providers_from_url')
))->setIcon('fa-treeview fa-fw fa-solid fa-book-atlas');
}
$nodes[] = (new TreeViewNode(
$this->translator->trans('info_providers.bulk_import.manage_jobs'),
$this->urlGenerator->generate('bulk_info_provider_manage')

View file

@ -0,0 +1,43 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2026 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Settings\InfoProviderSystem;
use App\Settings\SettingsIcon;
use Jbtronics\SettingsBundle\Metadata\EnvVarMode;
use Jbtronics\SettingsBundle\Settings\Settings;
use Jbtronics\SettingsBundle\Settings\SettingsParameter;
use Jbtronics\SettingsBundle\Settings\SettingsTrait;
use Symfony\Component\Translation\TranslatableMessage as TM;
#[Settings(name: "generic_web_provider", label: new TM("settings.ips.generic_web_provider"), description: new TM("settings.ips.generic_web_provider.description"))]
#[SettingsIcon("fa-plug")]
class GenericWebProviderSettings
{
use SettingsTrait;
#[SettingsParameter(label: new TM("settings.ips.lcsc.enabled"), description: new TM("settings.ips.generic_web_provider.enabled.help"),
envVar: "bool:PROVIDER_GENERIC_WEB_ENABLED", envVarMode: EnvVarMode::OVERWRITE
)]
public bool $enabled = false;
}

View file

@ -37,6 +37,9 @@ class InfoProviderSettings
#[EmbeddedSettings]
public ?InfoProviderGeneralSettings $general = null;
#[EmbeddedSettings]
public ?GenericWebProviderSettings $genericWebProvider = null;
#[EmbeddedSettings]
public ?DigikeySettings $digikey = null;

View file

@ -10,9 +10,9 @@
<!-- <span class="navbar-toggler-icon"></span> -->
<i class="fas fa-folder-open fa-lg fa-fw"></i>
</button>
{% if is_granted("@tools.label_scanner") %}
{% if is_granted("@tools.label_scanner") %}
<a href="{{ path('scan_dialog') }}" class="navbar-toggler nav-link ms-3">
<i class="fas fa-camera-retro fa-fw"></i>
<i class="fas fa-camera-retro fa-fw"></i>
</a>
{% endif %}
</div>
@ -52,6 +52,14 @@
{% trans %}info_providers.search.title{% endtrans %}
</a>
</li>
{% if settings_instance('generic_web_provider').enabled %}
<li>
<a class="dropdown-item" href="{{ path('info_providers_from_url') }}">
<i class="fa-fw fa-solid fa-book-atlas"></i>
{% trans %}info_providers.from_url.title{% endtrans %}
</a>
</li>
{% endif %}
{% endif %}
{% if is_granted('@parts.import') %}
@ -69,7 +77,7 @@
{% if is_granted('@parts.read') %}
{{ search.search_form("navbar") }}
{# {% include "_navbar_search.html.twig" %} #}
{# {% include "_navbar_search.html.twig" %} #}
{% endif %}
@ -145,4 +153,4 @@
</ul>
</div>
</div>
</nav>
</nav>

View file

@ -0,0 +1,21 @@
{% extends "main_card.html.twig" %}
{% import "info_providers/providers.macro.html.twig" as providers_macro %}
{% import "helper.twig" as helper %}
{% block title %}
{% trans %}info_providers.from_url.title{% endtrans %}
{% endblock %}
{% block card_title %}
<i class="fas fa-book-atlas"></i> {% trans %}info_providers.from_url.title{% endtrans %}
{% endblock %}
{% block card_content %}
<p class="text-muted offset-3">{% trans %}info_providers.from_url.help{% endtrans %}</p>
{{ form_start(form) }}
{{ form_row(form.url) }}
{{ form_row(form.submit) }}
{{ form_end(form) }}
{% endblock %}

View file

@ -10,7 +10,7 @@
{% block card_content %}
<div class="offset-sm-3">
<h3>
{% if info_provider_info.url %}
{% if info_provider_info.url is defined %}
<a href="{{ info_provider_info.url }}" class="link-external" target="_blank" rel="nofollow">{{ info_provider_info.name }}</a>
{% else %}
{{ info_provider_info.name }}

View file

@ -14316,5 +14316,47 @@ Buerklin-API Authentication server:
<target>Only includes attachments in the selected languages in the results.</target>
</segment>
</unit>
<unit id="PNmvQ.U" name="settings.ips.generic_web_provider">
<segment>
<source>settings.ips.generic_web_provider</source>
<target>Generic Web URL Provider</target>
</segment>
</unit>
<unit id="Vk3BoE_" name="settings.ips.generic_web_provider.description">
<segment>
<source>settings.ips.generic_web_provider.description</source>
<target>This info provider allows to retrieve basic part information from many shop page URLs.</target>
</segment>
</unit>
<unit id="Pu8juaH" name="settings.ips.generic_web_provider.enabled.help">
<segment>
<source>settings.ips.generic_web_provider.enabled.help</source>
<target>When the provider is enabled, users can make requests to arbitary websites on behalf of the Part-DB server. Only enable this, if you are aware of the potential consequences.</target>
</segment>
</unit>
<unit id="IvIOYcn" name="info_providers.from_url.title">
<segment>
<source>info_providers.from_url.title</source>
<target>Create [part] from URL</target>
</segment>
</unit>
<unit id="QLL7vDC" name="info_providers.from_url.url.label">
<segment>
<source>info_providers.from_url.url.label</source>
<target>URL</target>
</segment>
</unit>
<unit id="JTbTQLl" name="info_providers.from_url.no_part_found">
<segment>
<source>info_providers.from_url.no_part_found</source>
<target>No part found from the given URL. Are you sure this is a valid shop URL?</target>
</segment>
</unit>
<unit id="xoSvJk0" name="info_providers.from_url.help">
<segment>
<source>info_providers.from_url.help</source>
<target>Creates a part based on the given URL. It tries to delegate it to an existing info provider if possible, otherwise it will be tried to extract rudimentary data from the webpage's metadata.</target>
</segment>
</unit>
</file>
</xliff>