mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2026-02-11 03:59:35 +00:00
Merge branch 'generic_webshop'
Some checks failed
Build assets artifact / Build assets artifact (push) Has been cancelled
Docker Image Build / docker (push) Has been cancelled
Docker Image Build (FrankenPHP) / docker (push) Has been cancelled
Static analysis / Static analysis (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, sqlite) (push) Has been cancelled
Some checks failed
Build assets artifact / Build assets artifact (push) Has been cancelled
Docker Image Build / docker (push) Has been cancelled
Docker Image Build (FrankenPHP) / docker (push) Has been cancelled
Static analysis / Static analysis (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, mysql) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, postgres) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.2, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.3, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.4, sqlite) (push) Has been cancelled
PHPUnit Tests / PHPUnit and coverage Test (PHP 8.5, sqlite) (push) Has been cancelled
This commit is contained in:
commit
851055bdb4
3 changed files with 290 additions and 83 deletions
|
|
@ -18,6 +18,7 @@
|
|||
"api-platform/symfony": "^4.0.0",
|
||||
"beberlei/doctrineextensions": "^1.2",
|
||||
"brick/math": "^0.13.1",
|
||||
"brick/schema": "^0.2.0",
|
||||
"composer/ca-bundle": "^1.5",
|
||||
"composer/package-versions-deprecated": "^1.11.99.5",
|
||||
"doctrine/data-fixtures": "^2.0.0",
|
||||
|
|
|
|||
173
composer.lock
generated
173
composer.lock
generated
|
|
@ -4,7 +4,7 @@
|
|||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "8e387d6d016f33eb7302c47ecb7a12b9",
|
||||
"content-hash": "7ca9c95fb85f6bf3d9b8a3aa98ca33f6",
|
||||
"packages": [
|
||||
{
|
||||
"name": "amphp/amp",
|
||||
|
|
@ -2387,6 +2387,117 @@
|
|||
],
|
||||
"time": "2025-03-29T13:50:30+00:00"
|
||||
},
|
||||
{
|
||||
"name": "brick/schema",
|
||||
"version": "0.2.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/brick/schema.git",
|
||||
"reference": "b5114bf5e8092430041a37efe1cfd5279ca764c0"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/brick/schema/zipball/b5114bf5e8092430041a37efe1cfd5279ca764c0",
|
||||
"reference": "b5114bf5e8092430041a37efe1cfd5279ca764c0",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"brick/structured-data": "~0.1.0 || ~0.2.0",
|
||||
"ext-dom": "*",
|
||||
"php": "^8.1"
|
||||
},
|
||||
"require-dev": {
|
||||
"brick/varexporter": "^0.6",
|
||||
"vimeo/psalm": "6.12.0"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Brick\\Schema\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"description": "Schema.org library for PHP",
|
||||
"keywords": [
|
||||
"JSON-LD",
|
||||
"brick",
|
||||
"microdata",
|
||||
"rdfa lite",
|
||||
"schema",
|
||||
"schema.org",
|
||||
"structured data"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/brick/schema/issues",
|
||||
"source": "https://github.com/brick/schema/tree/0.2.0"
|
||||
},
|
||||
"funding": [
|
||||
{
|
||||
"url": "https://github.com/BenMorel",
|
||||
"type": "github"
|
||||
}
|
||||
],
|
||||
"time": "2025-06-12T07:03:20+00:00"
|
||||
},
|
||||
{
|
||||
"name": "brick/structured-data",
|
||||
"version": "0.2.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/brick/structured-data.git",
|
||||
"reference": "be9b28720e2aba87f19c90500700970be85affde"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/brick/structured-data/zipball/be9b28720e2aba87f19c90500700970be85affde",
|
||||
"reference": "be9b28720e2aba87f19c90500700970be85affde",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-dom": "*",
|
||||
"ext-json": "*",
|
||||
"ext-libxml": "*",
|
||||
"php": "^8.1",
|
||||
"sabre/uri": "^2.1 || ^3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"php-coveralls/php-coveralls": "^2.0",
|
||||
"phpunit/phpunit": "^8.0 || ^9.0",
|
||||
"vimeo/psalm": "6.12.0"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Brick\\StructuredData\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"description": "Microdata, RDFa Lite & JSON-LD structured data reader",
|
||||
"keywords": [
|
||||
"JSON-LD",
|
||||
"brick",
|
||||
"microdata",
|
||||
"rdfa",
|
||||
"structured data"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/brick/structured-data/issues",
|
||||
"source": "https://github.com/brick/structured-data/tree/0.2.0"
|
||||
},
|
||||
"funding": [
|
||||
{
|
||||
"url": "https://github.com/BenMorel",
|
||||
"type": "github"
|
||||
}
|
||||
],
|
||||
"time": "2025-06-10T23:48:46+00:00"
|
||||
},
|
||||
{
|
||||
"name": "composer/ca-bundle",
|
||||
"version": "1.5.10",
|
||||
|
|
@ -9595,6 +9706,66 @@
|
|||
},
|
||||
"time": "2025-09-14T07:37:21+00:00"
|
||||
},
|
||||
{
|
||||
"name": "sabre/uri",
|
||||
"version": "3.0.2",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sabre-io/uri.git",
|
||||
"reference": "38eeab6ed9eec435a2188db489d4649c56272c51"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sabre-io/uri/zipball/38eeab6ed9eec435a2188db489d4649c56272c51",
|
||||
"reference": "38eeab6ed9eec435a2188db489d4649c56272c51",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": "^7.4 || ^8.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"friendsofphp/php-cs-fixer": "^3.64",
|
||||
"phpstan/extension-installer": "^1.4",
|
||||
"phpstan/phpstan": "^1.12",
|
||||
"phpstan/phpstan-phpunit": "^1.4",
|
||||
"phpstan/phpstan-strict-rules": "^1.6",
|
||||
"phpunit/phpunit": "^9.6"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"files": [
|
||||
"lib/functions.php"
|
||||
],
|
||||
"psr-4": {
|
||||
"Sabre\\Uri\\": "lib/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"BSD-3-Clause"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Evert Pot",
|
||||
"email": "me@evertpot.com",
|
||||
"homepage": "http://evertpot.com/",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"description": "Functions for making sense out of URIs.",
|
||||
"homepage": "http://sabre.io/uri/",
|
||||
"keywords": [
|
||||
"rfc3986",
|
||||
"uri",
|
||||
"url"
|
||||
],
|
||||
"support": {
|
||||
"forum": "https://groups.google.com/group/sabredav-discuss",
|
||||
"issues": "https://github.com/sabre-io/uri/issues",
|
||||
"source": "https://github.com/fruux/sabre-uri"
|
||||
},
|
||||
"time": "2024-09-04T15:30:08+00:00"
|
||||
},
|
||||
{
|
||||
"name": "scheb/2fa-backup-code",
|
||||
"version": "v7.13.1",
|
||||
|
|
|
|||
|
|
@ -32,6 +32,14 @@ use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
|
|||
use App\Services\InfoProviderSystem\PartInfoRetriever;
|
||||
use App\Services\InfoProviderSystem\ProviderRegistry;
|
||||
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
|
||||
use Brick\Schema\Interfaces\BreadcrumbList;
|
||||
use Brick\Schema\Interfaces\ImageObject;
|
||||
use Brick\Schema\Interfaces\Product;
|
||||
use Brick\Schema\Interfaces\PropertyValue;
|
||||
use Brick\Schema\Interfaces\QuantitativeValue;
|
||||
use Brick\Schema\Interfaces\Thing;
|
||||
use Brick\Schema\SchemaReader;
|
||||
use Brick\Schema\SchemaTypeList;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
|
|
@ -104,126 +112,148 @@ class GenericWebProvider implements InfoProviderInterface
|
|||
return $host;
|
||||
}
|
||||
|
||||
private function productJsonLdToPart(array $jsonLd, string $url, Crawler $dom): PartDetailDTO
|
||||
private function breadcrumbToCategory(?BreadcrumbList $breadcrumbList): ?string
|
||||
{
|
||||
$notes = $jsonLd['description'] ?? "";
|
||||
if (isset($jsonLd['disambiguatingDescription'])) {
|
||||
if ($breadcrumbList === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$items = $breadcrumbList->itemListElement->getValues();
|
||||
if (count($items) < 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
//Build our category from the breadcrumb items
|
||||
$categories = [];
|
||||
foreach ($items as $item) {
|
||||
if (isset($item->name)) {
|
||||
$categories[] = trim($item->name->toString());
|
||||
}
|
||||
}
|
||||
} catch (\Throwable) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return implode(' -> ', $categories);
|
||||
}
|
||||
|
||||
private function productToPart(Product $product, string $url, Crawler $dom, ?BreadcrumbList $categoryBreadcrumb): PartDetailDTO
|
||||
{
|
||||
$notes = $product->description->toString() ?? "";
|
||||
if ($product->disambiguatingDescription !== null) {
|
||||
if (!empty($notes)) {
|
||||
$notes .= "\n\n";
|
||||
}
|
||||
$notes .= $jsonLd['disambiguatingDescription'];
|
||||
$notes .= $product->disambiguatingDescription->toString();
|
||||
}
|
||||
|
||||
|
||||
//Extract vendor infos
|
||||
$vendor_infos = null;
|
||||
if (isset($jsonLd['offers'])) {
|
||||
|
||||
if (array_is_list($jsonLd['offers'])) {
|
||||
$offer = $jsonLd['offers'][0];
|
||||
} else {
|
||||
$offer = $jsonLd['offers'];
|
||||
}
|
||||
|
||||
//Make $jsonLd['url'] absolute if it's relative
|
||||
if (isset($jsonLd['url']) && parse_url($jsonLd['url'], PHP_URL_SCHEME) === null) {
|
||||
$parsedUrl = parse_url($url);
|
||||
$scheme = $parsedUrl['scheme'] ?? 'https';
|
||||
$host = $parsedUrl['host'] ?? '';
|
||||
$jsonLd['url'] = $scheme.'://'.$host.$jsonLd['url'];
|
||||
}
|
||||
|
||||
$offer = $product->offers->getFirstValue();
|
||||
if ($offer !== null) {
|
||||
$prices = [];
|
||||
if (isset($offer['price'])) {
|
||||
$prices[] = new PriceDTO(
|
||||
if ($offer->price->toString() !== null) {
|
||||
$prices = [new PriceDTO(
|
||||
minimum_discount_amount: 1,
|
||||
price: (string) $offer['price'],
|
||||
currency_iso_code: $offer['priceCurrency'] ?? null
|
||||
);
|
||||
} else if (isset($offer['offers']) && array_is_list($offer['offers'])) {
|
||||
//Some sites nest offers
|
||||
foreach ($offer['offers'] as $subOffer) {
|
||||
if (isset($subOffer['price'])) {
|
||||
$prices[] = new PriceDTO(
|
||||
price: $offer->price->toString(),
|
||||
currency_iso_code: $offer->priceCurrency?->toString()
|
||||
)];
|
||||
} else { //Check for nested offers (like IKEA does it)
|
||||
$offer2 = $offer->offers->getFirstValue();
|
||||
if ($offer2 !== null && $offer2->price->toString() !== null) {
|
||||
$prices = [
|
||||
new PriceDTO(
|
||||
minimum_discount_amount: 1,
|
||||
price: (string) $subOffer['price'],
|
||||
currency_iso_code: $subOffer['priceCurrency'] ?? null
|
||||
);
|
||||
}
|
||||
price: $offer2->price->toString(),
|
||||
currency_iso_code: $offer2->priceCurrency?->toString()
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$vendor_infos = [new PurchaseInfoDTO(
|
||||
distributor_name: $this->extractShopName($url),
|
||||
order_number: (string) ($jsonLd['sku'] ?? $jsonLd['@id'] ?? $jsonLd['gtin'] ?? 'Unknown'),
|
||||
order_number: $product->sku?->toString() ?? $product->identifier?->toString() ?? 'Unknown',
|
||||
prices: $prices,
|
||||
product_url: $jsonLd['url'] ?? $url,
|
||||
product_url: $offer->url?->toString() ?? $url,
|
||||
)];
|
||||
}
|
||||
|
||||
//Extract image:
|
||||
$image = null;
|
||||
if (isset($jsonLd['image'])) {
|
||||
if (is_array($jsonLd['image'])) {
|
||||
if (array_is_list($jsonLd['image'])) {
|
||||
$image = $jsonLd['image'][0] ?? null;
|
||||
}
|
||||
} elseif (is_string($jsonLd['image'])) {
|
||||
$image = $jsonLd['image'];
|
||||
if ($product->image !== null) {
|
||||
$imageObj = $product->image->getFirstValue();
|
||||
if (is_string($imageObj)) {
|
||||
$image = $imageObj;
|
||||
} else if ($imageObj instanceof ImageObject) {
|
||||
$image = $imageObj->contentUrl?->toString() ?? $imageObj->url?->toString();
|
||||
}
|
||||
}
|
||||
//If image is an object with @type ImageObject, extract the url
|
||||
if (is_array($image) && isset($image['@type']) && $image['@type'] === 'ImageObject') {
|
||||
$image = $image['contentUrl'] ?? $image['url'] ?? null;
|
||||
}
|
||||
|
||||
//Try to extract parameters from additionalProperty
|
||||
//Extract parameters from additionalProperty
|
||||
$parameters = [];
|
||||
if (isset($jsonLd['additionalProperty']) && array_is_list($jsonLd['additionalProperty'])) {
|
||||
foreach ($jsonLd['additionalProperty'] as $property) { //TODO: Handle minValue and maxValue
|
||||
if (isset ($property['unitText'])) {
|
||||
foreach ($product->additionalProperty->getValues() as $property) {
|
||||
if ($property instanceof PropertyValue) { //TODO: Handle minValue and maxValue
|
||||
if ($property->unitText->toString() !== null) {
|
||||
$parameters[] = ParameterDTO::parseValueField(
|
||||
name: $property['name'] ?? 'Unknown',
|
||||
value: $property['value'] ?? '',
|
||||
unit: $property['unitText']
|
||||
name: $property->name->toString() ?? 'Unknown',
|
||||
value: $property->value->toString() ?? '',
|
||||
unit: $property->unitText->toString()
|
||||
);
|
||||
} else {
|
||||
$parameters[] = ParameterDTO::parseValueIncludingUnit(
|
||||
name: $property['name'] ?? 'Unknown',
|
||||
value: $property['value'] ?? ''
|
||||
name: $property->name->toString() ?? 'Unknown',
|
||||
value: $property->value->toString() ?? ''
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Try to extract weight
|
||||
$mass = null;
|
||||
if (($weight = $product?->weight->getFirstValue()) instanceof QuantitativeValue) {
|
||||
$mass = $weight->value->toString();
|
||||
}
|
||||
|
||||
return new PartDetailDTO(
|
||||
provider_key: $this->getProviderKey(),
|
||||
provider_id: $url,
|
||||
name: $jsonLd ['name'] ?? 'Unknown Name',
|
||||
name: $product->name?->toString() ?? $product->alternateName?->toString() ?? $product?->mpn->toString() ?? 'Unknown Name',
|
||||
description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '',
|
||||
category: isset($jsonLd['category']) && is_string($jsonLd['category']) ? $jsonLd['category'] : null,
|
||||
manufacturer: $jsonLd['manufacturer']['name'] ?? $jsonLd['brand']['name'] ?? null,
|
||||
mpn: $jsonLd['mpn'] ?? null,
|
||||
category: $this->breadcrumbToCategory($categoryBreadcrumb) ?? $product->category?->toString(),
|
||||
manufacturer: self::propertyOrString($product->manufacturer) ?? self::propertyOrString($product->brand),
|
||||
mpn: $product->mpn?->toString(),
|
||||
preview_image_url: $image,
|
||||
provider_url: $url,
|
||||
notes: $notes,
|
||||
parameters: $parameters,
|
||||
vendor_infos: $vendor_infos,
|
||||
mass: isset($jsonLd['weight']['value']) ? (float)$jsonLd['weight']['value'] : null,
|
||||
mass: $mass
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes JSON in a forgiving way, trying to fix common issues.
|
||||
* @param string $json
|
||||
* @return array
|
||||
* @throws \JsonException
|
||||
*/
|
||||
private function json_decode_forgiving(string $json): array
|
||||
private static function propertyOrString(SchemaTypeList|Thing|string|null $value, string $property = "name"): ?string
|
||||
{
|
||||
//Sanitize common issues
|
||||
$json = preg_replace("/[\r\n]+/", " ", $json);
|
||||
return json_decode($json, true, 512, JSON_THROW_ON_ERROR);
|
||||
if ($value instanceof SchemaTypeList) {
|
||||
$value = $value->getFirstValue();
|
||||
}
|
||||
if ($value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (is_string($value)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
if ($value instanceof Thing) {
|
||||
return $value->$property?->toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the content of a meta tag by its name or property attribute, or null if not found
|
||||
* @param Crawler $dom
|
||||
|
|
@ -336,18 +366,23 @@ class GenericWebProvider implements InfoProviderInterface
|
|||
$canonicalURL = $scheme.'://'.$host.$canonicalURL;
|
||||
}
|
||||
|
||||
//Try to find json-ld data in the head
|
||||
$jsonLdNodes = $dom->filter('script[type="application/ld+json"]');
|
||||
foreach ($jsonLdNodes as $node) {
|
||||
$jsonLd = $this->json_decode_forgiving($node->textContent);
|
||||
//If the content of json-ld is an array, try to find a product inside
|
||||
if (!array_is_list($jsonLd)) {
|
||||
$jsonLd = [$jsonLd];
|
||||
|
||||
$schemaReader = SchemaReader::forAllFormats();
|
||||
$things = $schemaReader->readHtml($content, $canonicalURL);
|
||||
|
||||
//Try to find a breadcrumb schema to extract the category
|
||||
$categoryBreadCrumbs = null;
|
||||
foreach ($things as $thing) {
|
||||
if ($thing instanceof BreadcrumbList) {
|
||||
$categoryBreadCrumbs = $thing;
|
||||
break;
|
||||
}
|
||||
foreach ($jsonLd as $item) {
|
||||
if (isset($item['@type']) && $item['@type'] === 'Product') {
|
||||
return $this->productJsonLdToPart($item, $canonicalURL, $dom);
|
||||
}
|
||||
}
|
||||
|
||||
//Try to find a Product schema
|
||||
foreach ($things as $thing) {
|
||||
if ($thing instanceof Product) {
|
||||
return $this->productToPart($thing, $canonicalURL, $dom, $categoryBreadCrumbs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue