Use markdown as input for the LLM and add extracted microdata separatley

This commit is contained in:
Jan Böhmer 2026-04-26 19:36:03 +02:00
parent af98fc1079
commit 4dbd92ac4d
5 changed files with 481 additions and 97 deletions

View file

@ -33,6 +33,7 @@
"jbtronics/dompdf-font-loader-bundle": "^1.0.0", "jbtronics/dompdf-font-loader-bundle": "^1.0.0",
"jbtronics/settings-bundle": "^3.0.0", "jbtronics/settings-bundle": "^3.0.0",
"jfcherng/php-diff": "^6.14", "jfcherng/php-diff": "^6.14",
"jkphl/micrometa": "dev-master",
"knpuniversity/oauth2-client-bundle": "^2.15", "knpuniversity/oauth2-client-bundle": "^2.15",
"league/commonmark": "^2.7", "league/commonmark": "^2.7",
"league/csv": "^9.8.0", "league/csv": "^9.8.0",
@ -159,6 +160,12 @@
"App\\Tests\\": "tests/" "App\\Tests\\": "tests/"
} }
}, },
"repositories": [
{
"type": "vcs",
"url": "https://github.com/jbtronics/micrometa"
}
],
"scripts": { "scripts": {
"auto-scripts": { "auto-scripts": {
"cache:clear": "symfony-cmd", "cache:clear": "symfony-cmd",

466
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "699f421ad81f8a1acacf8e2c4af66491", "content-hash": "7c76e3af5fd042105a3208fdcb300a11",
"packages": [ "packages": [
{ {
"name": "amphp/amp", "name": "amphp/amp",
@ -3883,16 +3883,16 @@
}, },
{ {
"name": "doctrine/migrations", "name": "doctrine/migrations",
"version": "3.9.6", "version": "3.9.7",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/doctrine/migrations.git", "url": "https://github.com/doctrine/migrations.git",
"reference": "ffd8355cdd8505fc650d9604f058bf62aedd80a1" "reference": "96cb2a89b56c9efb0bac38e606dc0b0f13e650ec"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/doctrine/migrations/zipball/ffd8355cdd8505fc650d9604f058bf62aedd80a1", "url": "https://api.github.com/repos/doctrine/migrations/zipball/96cb2a89b56c9efb0bac38e606dc0b0f13e650ec",
"reference": "ffd8355cdd8505fc650d9604f058bf62aedd80a1", "reference": "96cb2a89b56c9efb0bac38e606dc0b0f13e650ec",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -3966,7 +3966,7 @@
], ],
"support": { "support": {
"issues": "https://github.com/doctrine/migrations/issues", "issues": "https://github.com/doctrine/migrations/issues",
"source": "https://github.com/doctrine/migrations/tree/3.9.6" "source": "https://github.com/doctrine/migrations/tree/3.9.7"
}, },
"funding": [ "funding": [
{ {
@ -3982,7 +3982,7 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2026-02-11T06:46:11+00:00" "time": "2026-04-23T19:33:20+00:00"
}, },
{ {
"name": "doctrine/orm", "name": "doctrine/orm",
@ -4074,19 +4074,20 @@
}, },
{ {
"name": "doctrine/persistence", "name": "doctrine/persistence",
"version": "4.1.1", "version": "4.2.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/doctrine/persistence.git", "url": "https://github.com/doctrine/persistence.git",
"reference": "b9c49ad3558bb77ef973f4e173f2e9c2eca9be09" "reference": "49ab73e0d3e2ac8d1f5ecda3dd8acd5503781e8b"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/doctrine/persistence/zipball/b9c49ad3558bb77ef973f4e173f2e9c2eca9be09", "url": "https://api.github.com/repos/doctrine/persistence/zipball/49ab73e0d3e2ac8d1f5ecda3dd8acd5503781e8b",
"reference": "b9c49ad3558bb77ef973f4e173f2e9c2eca9be09", "reference": "49ab73e0d3e2ac8d1f5ecda3dd8acd5503781e8b",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
"doctrine/deprecations": "^1",
"doctrine/event-manager": "^1 || ^2", "doctrine/event-manager": "^1 || ^2",
"php": "^8.1", "php": "^8.1",
"psr/cache": "^1.0 || ^2.0 || ^3.0" "psr/cache": "^1.0 || ^2.0 || ^3.0"
@ -4097,13 +4098,13 @@
"phpstan/phpstan-phpunit": "^2", "phpstan/phpstan-phpunit": "^2",
"phpstan/phpstan-strict-rules": "^2", "phpstan/phpstan-strict-rules": "^2",
"phpunit/phpunit": "^10.5.58 || ^12", "phpunit/phpunit": "^10.5.58 || ^12",
"symfony/cache": "^4.4 || ^5.4 || ^6.0 || ^7.0", "symfony/cache": "^4.4 || ^5.4 || ^6.0 || ^7.0 || ^8.0",
"symfony/finder": "^4.4 || ^5.4 || ^6.0 || ^7.0" "symfony/finder": "^4.4 || ^5.4 || ^6.0 || ^7.0 || ^8.0"
}, },
"type": "library", "type": "library",
"autoload": { "autoload": {
"psr-4": { "psr-4": {
"Doctrine\\Persistence\\": "src/Persistence" "Doctrine\\Persistence\\": "src"
} }
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
@ -4147,7 +4148,7 @@
], ],
"support": { "support": {
"issues": "https://github.com/doctrine/persistence/issues", "issues": "https://github.com/doctrine/persistence/issues",
"source": "https://github.com/doctrine/persistence/tree/4.1.1" "source": "https://github.com/doctrine/persistence/tree/4.2.0"
}, },
"funding": [ "funding": [
{ {
@ -4163,7 +4164,7 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2025-10-16T20:13:18+00:00" "time": "2026-04-26T12:12:52+00:00"
}, },
{ {
"name": "doctrine/sql-formatter", "name": "doctrine/sql-formatter",
@ -5534,6 +5535,191 @@
], ],
"time": "2023-05-21T07:57:08+00:00" "time": "2023-05-21T07:57:08+00:00"
}, },
{
"name": "jkphl/dom-factory",
"version": "v1.0.1",
"source": {
"type": "git",
"url": "https://github.com/jkphl/dom-factory.git",
"reference": "dd32b8b2cc800f065c0eff8bb621d9f80147d45e"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/jkphl/dom-factory/zipball/dd32b8b2cc800f065c0eff8bb621d9f80147d45e",
"reference": "dd32b8b2cc800f065c0eff8bb621d9f80147d45e",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-libxml": "*",
"ext-mbstring": "*",
"guzzlehttp/guzzle": "^6.0||^7.0",
"masterminds/html5": "^2.7",
"php": ">=7.2"
},
"require-dev": {
"clue/graph-composer": "^1.1",
"php-coveralls/php-coveralls": "^2.2",
"phpunit/phpunit": "^8.0||^9.0",
"squizlabs/php_codesniffer": "^3.5"
},
"type": "library",
"autoload": {
"psr-4": {
"Jkphl\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Joschi Kuphal",
"email": "joschi@kuphal.net",
"homepage": "https://jkphl.is",
"role": "Developer"
}
],
"description": "Simple HTML5/XML DOM factory",
"homepage": "https://github.com/jkphl/dom-factory",
"support": {
"email": "joschi@kuphal.net",
"issues": "https://github.com/jkphl/dom-factory/issues",
"source": "https://github.com/jkphl/dom-factory"
},
"time": "2021-06-28T11:49:36+00:00"
},
{
"name": "jkphl/micrometa",
"version": "dev-master",
"source": {
"type": "git",
"url": "https://github.com/jbtronics/micrometa.git",
"reference": "720f409151c2cc20add9478b7a0a635fa1707021"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/jbtronics/micrometa/zipball/720f409151c2cc20add9478b7a0a635fa1707021",
"reference": "720f409151c2cc20add9478b7a0a635fa1707021",
"shasum": ""
},
"require": {
"ext-dom": "*",
"jkphl/dom-factory": "^1",
"jkphl/rdfa-lite-microdata": "^0.4.4",
"league/uri": "^5.0|^6.5|^7.0",
"mf2/mf2": "^0.4",
"ml/json-ld": "^1.2",
"monolog/monolog": "^1.24 || ^2 || ^3",
"php": ">=7.1.3",
"psr/cache": "^1.0|^2|^3",
"psr/log": "^1.1|^2|^3",
"symfony/cache": "^4.0|^5.0|^6.0|^7.0|^8.0"
},
"require-dev": {
"clue/graph-composer": "^1.1",
"mf2/tests": "@dev",
"php-coveralls/php-coveralls": "^2.1",
"phpunit/phpunit": "^7.0 || ^8.5",
"squizlabs/php_codesniffer": "^3.3"
},
"default-branch": true,
"type": "library",
"autoload": {
"psr-4": {
"Jkphl\\": "src/"
}
},
"scripts": {
"phpunit": [
"vendor/bin/phpunit --configuration phpunit.xml.dist"
],
"depgraph": [
"vendor/bin/graph-composer --no-dev export . doc/dependencies.svg"
],
"check-style": [
"vendor/bin/phpcs -p --standard=PSR2 --runtime-set ignore_errors_on_exit 1 --runtime-set ignore_warnings_on_exit 1 src"
],
"fix-style": [
"vendor/bin/phpcbf -p --standard=PSR2 --runtime-set ignore_errors_on_exit 1 --runtime-set ignore_warnings_on_exit 1 src"
],
"test": [
"@phpunit"
]
},
"license": [
"MIT"
],
"authors": [
{
"name": "Joschi Kuphal",
"email": "joschi@tollwerk.de",
"homepage": "https://jkphl.is",
"role": "Developer"
}
],
"description": "A meta parser for extracting micro information out of web documents, currently supporting Microformats 1+2, HTML Microdata, RDFa Lite 1.1 and JSON-LD",
"homepage": "https://jkphl.is/projects/micrometa/",
"support": {
"email": "joschi@tollwerk.de",
"source": "https://github.com/jkphl/micrometa",
"issues": "https://github.com/jkphl/micrometa/issues"
},
"time": "2026-04-26T17:25:19+00:00"
},
{
"name": "jkphl/rdfa-lite-microdata",
"version": "v0.4.7",
"source": {
"type": "git",
"url": "https://github.com/jkphl/rdfa-lite-microdata.git",
"reference": "ffc4940e8be55798257a03da7ed7d4506a13c3e5"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/jkphl/rdfa-lite-microdata/zipball/ffc4940e8be55798257a03da7ed7d4506a13c3e5",
"reference": "ffc4940e8be55798257a03da7ed7d4506a13c3e5",
"shasum": ""
},
"require": {
"jkphl/dom-factory": "^1",
"php": ">=5.5"
},
"require-dev": {
"clue/graph-composer": "dev-master",
"codeclimate/php-test-reporter": "^0.4.4",
"phpunit/phpunit": "^4.8",
"satooshi/php-coveralls": "^1.0",
"squizlabs/php_codesniffer": "^2.8"
},
"type": "library",
"autoload": {
"psr-4": {
"Jkphl\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Joschi Kuphal",
"email": "joschi@tollwerk.de",
"homepage": "https://jkphl.is",
"role": "Developer"
}
],
"description": "RDFa Lite 1.1 and HTML Microdata parser for web documents (HTML, SVG, XML)",
"homepage": "https://github.com/jkphl/rdfa-lite-microdata",
"support": {
"email": "joschi@tollwerk.de",
"issues": "https://github.com/jkphl/rdfa-lite-microdata/issues",
"source": "https://github.com/jkphl/rdfa-lite-microdata"
},
"time": "2023-01-27T13:29:45+00:00"
},
{ {
"name": "kelunik/certificate", "name": "kelunik/certificate",
"version": "v1.1.3", "version": "v1.1.3",
@ -6899,6 +7085,170 @@
}, },
"time": "2025-07-25T09:04:22+00:00" "time": "2025-07-25T09:04:22+00:00"
}, },
{
"name": "mf2/mf2",
"version": "0.4.6",
"source": {
"type": "git",
"url": "https://github.com/microformats/php-mf2.git",
"reference": "00b70ee7eb7f5b0585b1bd467f6c9cbd75055d23"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/microformats/php-mf2/zipball/00b70ee7eb7f5b0585b1bd467f6c9cbd75055d23",
"reference": "00b70ee7eb7f5b0585b1bd467f6c9cbd75055d23",
"shasum": ""
},
"require": {
"php": ">=5.4.0"
},
"require-dev": {
"mf2/tests": "@dev",
"phpdocumentor/phpdocumentor": "v2.8.4",
"phpunit/phpunit": "4.8.*"
},
"suggest": {
"barnabywalters/mf-cleaner": "To more easily handle the canonical data php-mf2 gives you",
"masterminds/html5": "Alternative HTML parser for PHP, for better HTML5 support."
},
"bin": [
"bin/fetch-mf2",
"bin/parse-mf2"
],
"type": "library",
"autoload": {
"files": [
"Mf2/Parser.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"CC0-1.0"
],
"authors": [
{
"name": "Barnaby Walters",
"homepage": "http://waterpigs.co.uk"
}
],
"description": "A pure, generic microformats2 parser — makes HTML as easy to consume as a JSON API",
"keywords": [
"html",
"microformats",
"microformats 2",
"parser",
"semantic"
],
"support": {
"issues": "https://github.com/microformats/php-mf2/issues",
"source": "https://github.com/microformats/php-mf2/tree/master"
},
"time": "2018-08-24T14:47:04+00:00"
},
{
"name": "ml/iri",
"version": "1.1.4",
"target-dir": "ML/IRI",
"source": {
"type": "git",
"url": "https://github.com/lanthaler/IRI.git",
"reference": "cbd44fa913e00ea624241b38cefaa99da8d71341"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/lanthaler/IRI/zipball/cbd44fa913e00ea624241b38cefaa99da8d71341",
"reference": "cbd44fa913e00ea624241b38cefaa99da8d71341",
"shasum": ""
},
"require": {
"lib-pcre": ">=4.0",
"php": ">=5.3.0"
},
"type": "library",
"autoload": {
"psr-0": {
"ML\\IRI": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Markus Lanthaler",
"email": "mail@markus-lanthaler.com",
"homepage": "http://www.markus-lanthaler.com",
"role": "Developer"
}
],
"description": "IRI handling for PHP",
"homepage": "http://www.markus-lanthaler.com",
"keywords": [
"URN",
"iri",
"uri",
"url"
],
"support": {
"issues": "https://github.com/lanthaler/IRI/issues",
"source": "https://github.com/lanthaler/IRI/tree/master"
},
"time": "2014-01-21T13:43:39+00:00"
},
{
"name": "ml/json-ld",
"version": "1.2.1",
"source": {
"type": "git",
"url": "https://github.com/lanthaler/JsonLD.git",
"reference": "537e68e87a6bce23e57c575cd5dcac1f67ce25d8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/lanthaler/JsonLD/zipball/537e68e87a6bce23e57c575cd5dcac1f67ce25d8",
"reference": "537e68e87a6bce23e57c575cd5dcac1f67ce25d8",
"shasum": ""
},
"require": {
"ext-json": "*",
"ml/iri": "^1.1.1",
"php": ">=5.3.0"
},
"require-dev": {
"json-ld/tests": "1.0",
"phpunit/phpunit": "^4"
},
"type": "library",
"autoload": {
"psr-4": {
"ML\\JsonLD\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Markus Lanthaler",
"email": "mail@markus-lanthaler.com",
"homepage": "http://www.markus-lanthaler.com",
"role": "Developer"
}
],
"description": "JSON-LD Processor for PHP",
"homepage": "http://www.markus-lanthaler.com",
"keywords": [
"JSON-LD",
"jsonld"
],
"support": {
"issues": "https://github.com/lanthaler/JsonLD/issues",
"source": "https://github.com/lanthaler/JsonLD/tree/1.2.1"
},
"time": "2022-09-29T08:45:17+00:00"
},
{ {
"name": "monolog/monolog", "name": "monolog/monolog",
"version": "3.10.0", "version": "3.10.0",
@ -9409,16 +9759,16 @@
}, },
{ {
"name": "rhukster/dom-sanitizer", "name": "rhukster/dom-sanitizer",
"version": "1.0.10", "version": "1.0.11",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/rhukster/dom-sanitizer.git", "url": "https://github.com/rhukster/dom-sanitizer.git",
"reference": "49a98046b708a4c92f754f5b0ef1720bb85142e2" "reference": "02d08ec8b36b93b04517d74fe82b715ef06273bd"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/rhukster/dom-sanitizer/zipball/49a98046b708a4c92f754f5b0ef1720bb85142e2", "url": "https://api.github.com/repos/rhukster/dom-sanitizer/zipball/02d08ec8b36b93b04517d74fe82b715ef06273bd",
"reference": "49a98046b708a4c92f754f5b0ef1720bb85142e2", "reference": "02d08ec8b36b93b04517d74fe82b715ef06273bd",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -9448,9 +9798,9 @@
"description": "A simple but effective DOM/SVG/MathML Sanitizer for PHP 7.4+", "description": "A simple but effective DOM/SVG/MathML Sanitizer for PHP 7.4+",
"support": { "support": {
"issues": "https://github.com/rhukster/dom-sanitizer/issues", "issues": "https://github.com/rhukster/dom-sanitizer/issues",
"source": "https://github.com/rhukster/dom-sanitizer/tree/1.0.10" "source": "https://github.com/rhukster/dom-sanitizer/tree/1.0.11"
}, },
"time": "2026-04-10T17:00:11+00:00" "time": "2026-04-23T22:56:32+00:00"
}, },
{ {
"name": "robrichards/xmlseclibs", "name": "robrichards/xmlseclibs",
@ -13693,7 +14043,7 @@
}, },
{ {
"name": "symfony/polyfill-ctype", "name": "symfony/polyfill-ctype",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-ctype.git", "url": "https://github.com/symfony/polyfill-ctype.git",
@ -13752,7 +14102,7 @@
"portable" "portable"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-ctype/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-ctype/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -13776,16 +14126,16 @@
}, },
{ {
"name": "symfony/polyfill-intl-grapheme", "name": "symfony/polyfill-intl-grapheme",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-intl-grapheme.git", "url": "https://github.com/symfony/polyfill-intl-grapheme.git",
"reference": "ad1b7b9092976d6c948b8a187cec9faaea9ec1df" "reference": "4864388bfbd3001ce88e234fab652acd91fdc57e"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-intl-grapheme/zipball/ad1b7b9092976d6c948b8a187cec9faaea9ec1df", "url": "https://api.github.com/repos/symfony/polyfill-intl-grapheme/zipball/4864388bfbd3001ce88e234fab652acd91fdc57e",
"reference": "ad1b7b9092976d6c948b8a187cec9faaea9ec1df", "reference": "4864388bfbd3001ce88e234fab652acd91fdc57e",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -13834,7 +14184,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-intl-grapheme/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-intl-grapheme/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -13854,11 +14204,11 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2026-04-10T16:19:22+00:00" "time": "2026-04-26T13:13:48+00:00"
}, },
{ {
"name": "symfony/polyfill-intl-icu", "name": "symfony/polyfill-intl-icu",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-intl-icu.git", "url": "https://github.com/symfony/polyfill-intl-icu.git",
@ -13922,7 +14272,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-intl-icu/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-intl-icu/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -13946,7 +14296,7 @@
}, },
{ {
"name": "symfony/polyfill-intl-idn", "name": "symfony/polyfill-intl-idn",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-intl-idn.git", "url": "https://github.com/symfony/polyfill-intl-idn.git",
@ -14009,7 +14359,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-intl-idn/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-intl-idn/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -14033,7 +14383,7 @@
}, },
{ {
"name": "symfony/polyfill-intl-normalizer", "name": "symfony/polyfill-intl-normalizer",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-intl-normalizer.git", "url": "https://github.com/symfony/polyfill-intl-normalizer.git",
@ -14094,7 +14444,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-intl-normalizer/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-intl-normalizer/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -14118,7 +14468,7 @@
}, },
{ {
"name": "symfony/polyfill-php83", "name": "symfony/polyfill-php83",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-php83.git", "url": "https://github.com/symfony/polyfill-php83.git",
@ -14174,7 +14524,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-php83/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-php83/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -14198,7 +14548,7 @@
}, },
{ {
"name": "symfony/polyfill-php84", "name": "symfony/polyfill-php84",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-php84.git", "url": "https://github.com/symfony/polyfill-php84.git",
@ -14254,7 +14604,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-php84/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-php84/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -14278,16 +14628,16 @@
}, },
{ {
"name": "symfony/polyfill-php85", "name": "symfony/polyfill-php85",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-php85.git", "url": "https://github.com/symfony/polyfill-php85.git",
"reference": "2c408a6bb0313e6001a83628dc5506100474254e" "reference": "fcfa4973a9917cef23f2e38774da74a2b7d115ee"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-php85/zipball/2c408a6bb0313e6001a83628dc5506100474254e", "url": "https://api.github.com/repos/symfony/polyfill-php85/zipball/fcfa4973a9917cef23f2e38774da74a2b7d115ee",
"reference": "2c408a6bb0313e6001a83628dc5506100474254e", "reference": "fcfa4973a9917cef23f2e38774da74a2b7d115ee",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -14334,7 +14684,7 @@
"shim" "shim"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-php85/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-php85/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -14354,11 +14704,11 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2026-04-10T16:50:15+00:00" "time": "2026-04-26T13:10:57+00:00"
}, },
{ {
"name": "symfony/polyfill-uuid", "name": "symfony/polyfill-uuid",
"version": "v1.36.0", "version": "v1.37.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/symfony/polyfill-uuid.git", "url": "https://github.com/symfony/polyfill-uuid.git",
@ -14417,7 +14767,7 @@
"uuid" "uuid"
], ],
"support": { "support": {
"source": "https://github.com/symfony/polyfill-uuid/tree/v1.36.0" "source": "https://github.com/symfony/polyfill-uuid/tree/v1.37.0"
}, },
"funding": [ "funding": [
{ {
@ -19854,12 +20204,12 @@
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/Roave/SecurityAdvisories.git", "url": "https://github.com/Roave/SecurityAdvisories.git",
"reference": "10b8a93511210c9bae3be31f4fe13c3ff974cad4" "reference": "08cd07f04fb07fb4d316e956801d57b700cf7096"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/10b8a93511210c9bae3be31f4fe13c3ff974cad4", "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/08cd07f04fb07fb4d316e956801d57b700cf7096",
"reference": "10b8a93511210c9bae3be31f4fe13c3ff974cad4", "reference": "08cd07f04fb07fb4d316e956801d57b700cf7096",
"shasum": "" "shasum": ""
}, },
"conflict": { "conflict": {
@ -19882,6 +20232,7 @@
"alextselegidis/easyappointments": "<=1.5.2", "alextselegidis/easyappointments": "<=1.5.2",
"alexusmai/laravel-file-manager": "<=3.3.1", "alexusmai/laravel-file-manager": "<=3.3.1",
"algolia/algoliasearch-magento-2": "<=3.16.1|>=3.17.0.0-beta1,<=3.17.1", "algolia/algoliasearch-magento-2": "<=3.16.1|>=3.17.0.0-beta1,<=3.17.1",
"almirhodzic/nova-toggle-5": "<1.3",
"alt-design/alt-redirect": "<1.6.4", "alt-design/alt-redirect": "<1.6.4",
"altcha-org/altcha": "<1.3.1", "altcha-org/altcha": "<1.3.1",
"alterphp/easyadmin-extension-bundle": ">=1.2,<1.2.11|>=1.3,<1.3.1", "alterphp/easyadmin-extension-bundle": ">=1.2,<1.2.11|>=1.3,<1.3.1",
@ -19978,7 +20329,7 @@
"ckeditor/ckeditor": "<4.25", "ckeditor/ckeditor": "<4.25",
"clickstorm/cs-seo": ">=6,<6.8|>=7,<7.5|>=8,<8.4|>=9,<9.3", "clickstorm/cs-seo": ">=6,<6.8|>=7,<7.5|>=8,<8.4|>=9,<9.3",
"co-stack/fal_sftp": "<0.2.6", "co-stack/fal_sftp": "<0.2.6",
"cockpit-hq/cockpit": "<2.13.5", "cockpit-hq/cockpit": "<2.14",
"code16/sharp": "<9.20", "code16/sharp": "<9.20",
"codeception/codeception": "<3.1.3|>=4,<4.1.22", "codeception/codeception": "<3.1.3|>=4,<4.1.22",
"codeigniter/framework": "<3.1.10", "codeigniter/framework": "<3.1.10",
@ -20141,7 +20492,7 @@
"fisharebest/webtrees": "<=2.1.18", "fisharebest/webtrees": "<=2.1.18",
"fixpunkt/fp-masterquiz": "<2.2.1|>=3,<3.5.2", "fixpunkt/fp-masterquiz": "<2.2.1|>=3,<3.5.2",
"fixpunkt/fp-newsletter": "<1.1.1|>=1.2,<2.1.2|>=2.2,<3.2.6", "fixpunkt/fp-newsletter": "<1.1.1|>=1.2,<2.1.2|>=2.2,<3.2.6",
"flarum/core": "<1.8.10", "flarum/core": "<=1.8.15|>=2.0.0.0-beta1,<=2.0.0.0-beta8",
"flarum/flarum": "<0.1.0.0-beta8", "flarum/flarum": "<0.1.0.0-beta8",
"flarum/framework": "<1.8.10", "flarum/framework": "<1.8.10",
"flarum/mentions": "<1.6.3", "flarum/mentions": "<1.6.3",
@ -20178,7 +20529,7 @@
"geshi/geshi": "<=1.0.9.1", "geshi/geshi": "<=1.0.9.1",
"getformwork/formwork": "<=2.3.3", "getformwork/formwork": "<=2.3.3",
"getgrav/grav": "<1.11.0.0-beta1", "getgrav/grav": "<1.11.0.0-beta1",
"getkirby/cms": "<=5.2.1", "getkirby/cms": "<5.4",
"getkirby/kirby": "<3.9.8.3-dev|>=3.10,<3.10.1.2-dev|>=4,<4.7.1", "getkirby/kirby": "<3.9.8.3-dev|>=3.10,<3.10.1.2-dev|>=4,<4.7.1",
"getkirby/panel": "<2.5.14", "getkirby/panel": "<2.5.14",
"getkirby/starterkit": "<=3.7.0.2", "getkirby/starterkit": "<=3.7.0.2",
@ -20276,7 +20627,7 @@
"kelvinmo/simplexrd": "<3.1.1", "kelvinmo/simplexrd": "<3.1.1",
"kevinpapst/kimai2": "<1.16.7", "kevinpapst/kimai2": "<1.16.7",
"khodakhah/nodcms": "<=3.4.1", "khodakhah/nodcms": "<=3.4.1",
"kimai/kimai": "<=2.53", "kimai/kimai": "<2.54",
"kitodo/presentation": "<3.2.3|>=3.3,<3.3.4", "kitodo/presentation": "<3.2.3|>=3.3,<3.3.4",
"klaviyo/magento2-extension": ">=1,<3", "klaviyo/magento2-extension": ">=1,<3",
"knplabs/knp-snappy": "<=1.4.2", "knplabs/knp-snappy": "<=1.4.2",
@ -20720,7 +21071,7 @@
"twig/twig": "<3.11.2|>=3.12,<3.14.1|>=3.16,<3.19", "twig/twig": "<3.11.2|>=3.12,<3.14.1|>=3.16,<3.19",
"typicms/core": "<16.1.7", "typicms/core": "<16.1.7",
"typo3/cms": "<9.5.29|>=10,<10.4.35|>=11,<11.5.23|>=12,<12.2", "typo3/cms": "<9.5.29|>=10,<10.4.35|>=11,<11.5.23|>=12,<12.2",
"typo3/cms-backend": "<4.1.14|>=4.2,<4.2.15|>=4.3,<4.3.7|>=4.4,<4.4.4|>=7,<=7.6.50|>=8,<=8.7.39|>=9,<9.5.55|>=10,<=10.4.54|>=11,<=11.5.48|>=12,<=12.4.40|>=13,<=13.4.22|>=14,<=14.0.1", "typo3/cms-backend": "<4.1.14|>=4.2,<4.2.15|>=4.3,<4.3.7|>=4.4,<4.4.4|>=7,<=7.6.50|>=8,<=8.7.39|>=9,<9.5.55|>=10,<=10.4.54|>=11,<=11.5.48|>=12,<=12.4.40|>=13,<=13.4.22|>=14,<=14.0.1|==14.2",
"typo3/cms-belog": ">=10,<=10.4.47|>=11,<=11.5.41|>=12,<=12.4.24|>=13,<=13.4.2", "typo3/cms-belog": ">=10,<=10.4.47|>=11,<=11.5.41|>=12,<=12.4.24|>=13,<=13.4.2",
"typo3/cms-beuser": ">=9,<9.5.55|>=10,<10.4.54|>=11,<11.5.48|>=12,<12.4.37|>=13,<13.4.18", "typo3/cms-beuser": ">=9,<9.5.55|>=10,<10.4.54|>=11,<11.5.48|>=12,<12.4.37|>=13,<13.4.18",
"typo3/cms-core": "<=8.7.56|>=9,<9.5.55|>=10,<=10.4.54|>=11,<=11.5.48|>=12,<=12.4.40|>=13,<=13.4.22|>=14,<=14.0.1", "typo3/cms-core": "<=8.7.56|>=9,<9.5.55|>=10,<=10.4.54|>=11,<=11.5.48|>=12,<=12.4.40|>=13,<=13.4.22|>=14,<=14.0.1",
@ -20902,7 +21253,7 @@
"type": "tidelift" "type": "tidelift"
} }
], ],
"time": "2026-04-22T18:27:19+00:00" "time": "2026-04-24T17:22:29+00:00"
}, },
{ {
"name": "sebastian/cli-parser", "name": "sebastian/cli-parser",
@ -22418,6 +22769,7 @@
"aliases": [], "aliases": [],
"minimum-stability": "stable", "minimum-stability": "stable",
"stability-flags": { "stability-flags": {
"jkphl/micrometa": 20,
"roave/security-advisories": 20 "roave/security-advisories": 20
}, },
"prefer-stable": false, "prefer-stable": false,

View file

@ -54,7 +54,8 @@ final class DTOJsonSchemaConverter
'category' => ['type' => ['string', 'null'], 'description' => 'Product category'], 'category' => ['type' => ['string', 'null'], 'description' => 'Product category'],
'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'], 'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'],
'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'], 'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'],
'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'], 'mass' => ['type' => ['number', 'null'], 'description' => 'Mass of the product in grams'],
'gtin' => ['type' => ['string', 'null'], 'description' => 'Global Trade Item Number (GTIN) / EAN / UPC code'],
'parameters' => [ 'parameters' => [
'type' => 'array', 'type' => 'array',
'items' => [ 'items' => [
@ -94,17 +95,17 @@ final class DTOJsonSchemaConverter
'items' => [ 'items' => [
'type' => 'object', 'type' => 'object',
'properties' => [ 'properties' => [
'distributor_name' => ['type' => 'string'], 'distributor_name' => ['type' => 'string', 'description' => 'Name of the distributor or vendor. Typically the shop name'],
'order_number' => ['type' => ['string', 'null']], 'order_number' => ['type' => ['string', 'null'], 'description' => 'The order number or SKU used by the distributor. Optional, but can help to find the product on the distributor website.'],
'product_url' => ['type' => 'string'], 'product_url' => ['type' => 'string'],
'prices' => [ 'prices' => [
'type' => 'array', 'type' => 'array',
'items' => [ 'items' => [
'type' => 'object', 'type' => 'object',
'properties' => [ 'properties' => [
'minimum_quantity' => ['type' => 'integer'], 'minimum_quantity' => ['type' => 'integer', 'description' => 'Minimum quantity for this price tier. 1 when no tiered pricing is available.'],
'price' => ['type' => 'number'], 'price' => ['type' => 'number', 'description' => 'Price for the given minimum quantity.'],
'currency' => ['type' => 'string'], 'currency' => ['type' => 'string', 'description' => 'Currency ISO code, e.g. USD'],
], ],
'required' => ['minimum_quantity', 'price', 'currency'], 'required' => ['minimum_quantity', 'price', 'currency'],
], ],
@ -226,6 +227,7 @@ final class DTOJsonSchemaConverter
manufacturing_status: $manufacturingStatus, manufacturing_status: $manufacturingStatus,
provider_url: $productUrl, provider_url: $productUrl,
footprint: $data['footprint'] ?? null, footprint: $data['footprint'] ?? null,
gtin: $data['gtin'] ?? null,
notes: null, notes: null,
datasheets: $datasheets, datasheets: $datasheets,
images: $images, images: $images,

View file

@ -29,10 +29,15 @@ use App\Services\AI\AIPlatformRegistry;
use App\Services\InfoProviderSystem\DTOJsonSchemaConverter; use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Settings\InfoProviderSystem\AIExtractorSettings; use App\Settings\InfoProviderSystem\AIExtractorSettings;
use Brick\Schema\SchemaReader;
use Jkphl\Micrometa;
use League\HTMLToMarkdown\HtmlConverter;
use Symfony\AI\Platform\Message\Message; use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
final class AIInfoExtractor implements InfoProviderInterface final class AIInfoExtractor implements InfoProviderInterface
{ {
use FixAndValidateUrlTrait; use FixAndValidateUrlTrait;
@ -95,16 +100,56 @@ final class AIInfoExtractor implements InfoProviderInterface
$html = $response->getContent(); $html = $response->getContent();
// Clean HTML // Clean HTML
$cleanedHtml = $this->cleanHTML($html); /*$cleanedHtml = $this->cleanHTML($html);
// Truncate to max content length // Truncate to max content length
$truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength); $truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength);*/
$markdown = $this->htmlToMarkdown($html);
//Extract structured data using traditional methods, to provide additional context to the LLM. This can help improve accuracy, especially for technical specifications that might be in tables or specific formats.
$structuredData = $this->extractStructuredData($html, $url);
// Call LLM // Call LLM
$llmResponse = $this->callLLM($truncatedHtml, $url); $llmResponse = $this->callLLM($markdown, $url, $structuredData);
// Build and return PartDetailDTO // Build and return PartDetailDTO
return $this->jsonSchemaConverter->jsonToDTO($llmResponse, $this->getProviderKey(), $url, $url, self::DISTRIBUTOR_NAME); $result = $this->jsonSchemaConverter->jsonToDTO($llmResponse, $this->getProviderKey(), $url, $url, self::DISTRIBUTOR_NAME);
return $result;
}
/**
* Extracts structured data from the HTML using microformats.
* @param string $html
* @param string $url
* @return string JSON encoded structured data
*/
private function extractStructuredData(string $html, string $url): string
{
$micrometa = new Micrometa\Ports\Parser();
$items = $micrometa($url, $html);
return json_encode($items->toObject(), JSON_THROW_ON_ERROR);
}
private function htmlToMarkdown(string $html): string
{
//Extract only the main content of the page to avoid overwhelming the LLM with irrelevant information.
$crawler = new Crawler($html);
$mainContent = $crawler->filter('main, article, #content')->first();
// If we found a specific content area, get its HTML; otherwise, use the whole body.
$htmlToConvert = $mainContent->count() ? $mainContent->html() : $html;
//Concert to markdown
$converter = new HtmlConverter([
'strip_tags' => true, // Removes tags that aren't Markdown-compatible (like <div>)
'hard_break' => true, // Preserves line breaks
'remove_nodes' => 'nav footer script style' // Extra safety layer
]);
return $converter->convert($htmlToConvert);
} }
public function getCapabilities(): array public function getCapabilities(): array
@ -160,13 +205,18 @@ final class AIInfoExtractor implements InfoProviderInterface
return $truncated; return $truncated;
} }
private function callLLM(string $htmlContent, string $url): array private function callLLM(string $htmlContent, string $url, ?string $structuredData = null): array
{ {
$input = new MessageBag( $input = new MessageBag(
Message::forSystem($this->buildSystemPrompt()), Message::forSystem($this->buildSystemPrompt()),
Message::ofUser("Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent") Message::ofUser("Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent")
); );
if ($structuredData) {
$input->add(Message::ofUser("Following data was extracted using traditional methods, but might be incomplete or inaccurate.
Enrich it with the actual website data:\n\n".$structuredData));
}
try { try {
$aiPlatform = $this->AIPlatformRegistry->getPlatform($this->settings->platform ?? throw new \RuntimeException('No AI platform selected') ); $aiPlatform = $this->AIPlatformRegistry->getPlatform($this->settings->platform ?? throw new \RuntimeException('No AI platform selected') );
@ -187,29 +237,8 @@ final class AIInfoExtractor implements InfoProviderInterface
private function buildSystemPrompt(): string private function buildSystemPrompt(): string
{ {
return <<<'PROMPT' return <<<'PROMPT'
You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format. You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format, from markdown extracted from a product page.
Focus on the main content of the page, such as product descriptions, specifications, and tables. Ignore navigation menus, footers, and sidebars.
Return ONLY a valid JSON object with this exact structure:
{
"name": "string",
"description": "string",
"manufacturer": "string | null",
"mpn": "string | null",
"category": "string | null",
"manufacturing_status": "active|obsolete|nrfnd|discontinued|null",
"footprint": "string | null",
"mass": "number | null (in grams)",
"parameters": [{"name": "string", "value": "string", "unit": "string | null"}],
"datasheets": [{"url": "string", "description": "string"}],
"images": [{"url": "string", "description": "string"}],
"vendor_infos": [{
"distributor_name": "string",
"order_number": "string | null",
"product_url": "string",
"prices": [{"minimum_quantity": int, "price": number, "currency": "string"}]
}],
"manufacturer_product_url": "string | null"
}
Rules: Rules:
- manufacturing_status: Use "active", "obsolete", "nrfnd" (not recommended for new designs), "discontinued", or null - manufacturing_status: Use "active", "obsolete", "nrfnd" (not recommended for new designs), "discontinued", or null

View file

@ -441,12 +441,6 @@
"symfony/browser-kit": { "symfony/browser-kit": {
"version": "v4.2.3" "version": "v4.2.3"
}, },
"symfony/cache": {
"version": "v4.2.3"
},
"symfony/cache-contracts": {
"version": "v1.1.5"
},
"symfony/config": { "symfony/config": {
"version": "v4.2.3" "version": "v4.2.3"
}, },