mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2026-02-11 12:09:36 +00:00
Try to get a category from a webshop based on the breadcrumbs
This commit is contained in:
parent
b48de83a32
commit
7d19ed3ca8
1 changed files with 39 additions and 8 deletions
|
|
@ -32,6 +32,7 @@ use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
|
||||||
use App\Services\InfoProviderSystem\PartInfoRetriever;
|
use App\Services\InfoProviderSystem\PartInfoRetriever;
|
||||||
use App\Services\InfoProviderSystem\ProviderRegistry;
|
use App\Services\InfoProviderSystem\ProviderRegistry;
|
||||||
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
|
use App\Settings\InfoProviderSystem\GenericWebProviderSettings;
|
||||||
|
use Brick\Schema\Interfaces\BreadcrumbList;
|
||||||
use Brick\Schema\Interfaces\ImageObject;
|
use Brick\Schema\Interfaces\ImageObject;
|
||||||
use Brick\Schema\Interfaces\Product;
|
use Brick\Schema\Interfaces\Product;
|
||||||
use Brick\Schema\Interfaces\PropertyValue;
|
use Brick\Schema\Interfaces\PropertyValue;
|
||||||
|
|
@ -39,11 +40,6 @@ use Brick\Schema\Interfaces\QuantitativeValue;
|
||||||
use Brick\Schema\Interfaces\Thing;
|
use Brick\Schema\Interfaces\Thing;
|
||||||
use Brick\Schema\SchemaReader;
|
use Brick\Schema\SchemaReader;
|
||||||
use Brick\Schema\SchemaTypeList;
|
use Brick\Schema\SchemaTypeList;
|
||||||
use Brick\StructuredData\HTMLReader;
|
|
||||||
use Brick\StructuredData\Reader\JsonLdReader;
|
|
||||||
use Brick\StructuredData\Reader\MicrodataReader;
|
|
||||||
use Brick\StructuredData\Reader\RdfaLiteReader;
|
|
||||||
use Brick\StructuredData\Reader\ReaderChain;
|
|
||||||
use Symfony\Component\DomCrawler\Crawler;
|
use Symfony\Component\DomCrawler\Crawler;
|
||||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||||
|
|
||||||
|
|
@ -116,7 +112,33 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
return $host;
|
return $host;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function productToPart(Product $product, string $url, Crawler $dom): PartDetailDTO
|
private function breadcrumbToCategory(?BreadcrumbList $breadcrumbList): ?string
|
||||||
|
{
|
||||||
|
if ($breadcrumbList === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$items = $breadcrumbList->itemListElement->getValues();
|
||||||
|
if (count($items) < 1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
//Build our category from the breadcrumb items
|
||||||
|
$categories = [];
|
||||||
|
foreach ($items as $item) {
|
||||||
|
if (isset($item->name)) {
|
||||||
|
$categories[] = trim($item->name->toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (\Throwable) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode(' -> ', $categories);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function productToPart(Product $product, string $url, Crawler $dom, ?BreadcrumbList $categoryBreadcrumb): PartDetailDTO
|
||||||
{
|
{
|
||||||
$notes = $product->description->toString() ?? "";
|
$notes = $product->description->toString() ?? "";
|
||||||
if ($product->disambiguatingDescription !== null) {
|
if ($product->disambiguatingDescription !== null) {
|
||||||
|
|
@ -200,7 +222,7 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
provider_id: $url,
|
provider_id: $url,
|
||||||
name: $product->name?->toString() ?? $product->alternateName?->toString() ?? $product?->mpn->toString() ?? 'Unknown Name',
|
name: $product->name?->toString() ?? $product->alternateName?->toString() ?? $product?->mpn->toString() ?? 'Unknown Name',
|
||||||
description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '',
|
description: $this->getMetaContent($dom, 'og:description') ?? $this->getMetaContent($dom, 'description') ?? '',
|
||||||
category: $product->category?->toString(),
|
category: $this->breadcrumbToCategory($categoryBreadcrumb) ?? $product->category?->toString(),
|
||||||
manufacturer: self::propertyOrString($product->manufacturer) ?? self::propertyOrString($product->brand),
|
manufacturer: self::propertyOrString($product->manufacturer) ?? self::propertyOrString($product->brand),
|
||||||
mpn: $product->mpn?->toString(),
|
mpn: $product->mpn?->toString(),
|
||||||
preview_image_url: $image,
|
preview_image_url: $image,
|
||||||
|
|
@ -348,10 +370,19 @@ class GenericWebProvider implements InfoProviderInterface
|
||||||
$schemaReader = SchemaReader::forAllFormats();
|
$schemaReader = SchemaReader::forAllFormats();
|
||||||
$things = $schemaReader->readHtml($content, $canonicalURL);
|
$things = $schemaReader->readHtml($content, $canonicalURL);
|
||||||
|
|
||||||
|
//Try to find a breadcrumb schema to extract the category
|
||||||
|
$categoryBreadCrumbs = null;
|
||||||
|
foreach ($things as $thing) {
|
||||||
|
if ($thing instanceof BreadcrumbList) {
|
||||||
|
$categoryBreadCrumbs = $thing;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Try to find a Product schema
|
//Try to find a Product schema
|
||||||
foreach ($things as $thing) {
|
foreach ($things as $thing) {
|
||||||
if ($thing instanceof Product) {
|
if ($thing instanceof Product) {
|
||||||
return $this->productToPart($thing, $canonicalURL, $dom);
|
return $this->productToPart($thing, $canonicalURL, $dom, $categoryBreadCrumbs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue