Compare commits

..

No commits in common. "91bf8371adb992aa72a478639eb8245d071d5e5a" and "fcd598286a527e19f9310e19ca02ad078f1805db" have entirely different histories.

3 changed files with 3 additions and 13 deletions

View file

@ -117,7 +117,7 @@ This provider can be particularly useful for extracting information from website
It also potentially extracts more detailed information than the Generic Web URL Provider, as it is not limited to the fields defined in the Schema.org format.
To use the AI Web Extractor, you need to setup an AI platform, in the AI settings tab, and chose a model, which support structured output.
For many use cases a small and cheap model like `google/gemini-2.5-flash-lite` will be sufficient, coming down to costs like 0.001$ per request.
For many use cases a small and cheap model like `google/gemini-2.5-flash-lite` will be sufficient, coming down to costs like 0.003$ per request.
For more complex websites, or if you wanna use the LLM for translation purposes too, you should consider a more powerful model.
You can add some additional instructions for the model, which gets added to the system prompt, to tweak the output of the model.

View file

@ -32,7 +32,6 @@ use App\Services\InfoProviderSystem\DTOJsonSchemaConverter;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Settings\InfoProviderSystem\AIExtractorSettings;
use Brick\Schema\SchemaReader;
use Imagine\Image\Format;
use Jkphl\Micrometa;
use League\HTMLToMarkdown\HtmlConverter;
use Psr\Cache\CacheItemPoolInterface;
@ -175,8 +174,7 @@ final class AIWebProvider implements InfoProviderInterface
*/
private function extractStructuredData(string $html, string $url): string
{
//Only parse microdata, json-ld and rdfa, as they are the most common formats for structured data on product pages. Links and microformat only create clutter for the LLM
$micrometa = new Micrometa\Ports\Parser(Micrometa\Ports\Format::JSON_LD | Micrometa\Ports\Format::MICRODATA | Micrometa\Ports\Format::RDFA_LITE);
$micrometa = new Micrometa\Ports\Parser();
$items = $micrometa($url, $html);
return json_encode($items->toObject(), JSON_THROW_ON_ERROR);
@ -266,9 +264,6 @@ Rules:
- If information is not found, use null
- Try to avoid duplicating parameters, if the same parameter is mentioned multiple times, or if it is already used in another field.
- Include only the 1 to 3 most relevant images, such as the main product image or important diagrams. Ignore decorative images, logos, or icons.
- Extract GTIN / EAN if available, as it can be useful for matching parts across different sources, even if the part number is different.
- Include detailed product description into notes field, as it can contain important information that doesn't fit into other fields, such as features, applications, or unique selling points.
PROMPT;
if ($this->settings->outputLanguage === null) {

View file

@ -34,7 +34,6 @@ use Jbtronics\SettingsBundle\Settings\SettingsTrait;
use Symfony\AI\Platform\Capability;
use Symfony\Component\Form\Extension\Core\Type\LanguageType;
use Symfony\Component\Form\Extension\Core\Type\TextareaType;
use Symfony\Component\Translation\StaticMessage;
use Symfony\Component\Translation\TranslatableMessage as TM;
use Symfony\Component\Validator\Constraints\Language;
@ -52,11 +51,7 @@ class AIExtractorSettings
public ?AIPlatforms $platform = null;
#[SettingsParameter(label: new TM("settings.ips.ai_extractor.model"), description: new TM("settings.ips.ai_extractor.model.help"),
formType: AiModelsType::class, formOptions: [
'platform_selector' => self::MODEL_SELECTOR_LABEL, 'filter_capability' => Capability::OUTPUT_STRUCTURED,
'attr' => ['placeholder' => new StaticMessage('google/gemini-2.5-flash-lite')]
],
formType: AiModelsType::class, formOptions: ['platform_selector' => self::MODEL_SELECTOR_LABEL, 'filter_capability' => Capability::OUTPUT_STRUCTURED],
)]
public ?string $model = null;