Added option for translating AI extracted output

This commit is contained in:
Jan Böhmer 2026-04-26 22:11:27 +02:00
parent 7117926584
commit 0ca5a41298
3 changed files with 42 additions and 1 deletions

View file

@ -35,6 +35,7 @@ use League\HTMLToMarkdown\HtmlConverter;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Intl\Languages;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use function Symfony\Component\String\u;
@ -210,7 +211,7 @@ final class AIInfoExtractor implements InfoProviderInterface
private function buildSystemPrompt(): string
{
return <<<'PROMPT'
$tmp = <<<'PROMPT'
You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format, from markdown extracted from a product page.
Focus on the main content of the page, such as product descriptions, specifications, and tables. Ignore navigation menus, footers, and sidebars.
@ -224,6 +225,14 @@ Rules:
For parameters, combine name, value, and unit. The unit should be separate if possible.
PROMPT;
if ($this->settings->outputLanguage === null) {
$tmp .= "\n\nProvide the response in the same language of the webpage.";
} else {
$tmp .= "\n\nThe response must be in ". Languages::getName($this->settings->outputLanguage, 'en') ." language. Translate texts if needed.";
}
return $tmp;
}
}

View file

@ -32,7 +32,9 @@ use Jbtronics\SettingsBundle\Settings\Settings;
use Jbtronics\SettingsBundle\Settings\SettingsParameter;
use Jbtronics\SettingsBundle\Settings\SettingsTrait;
use Symfony\AI\Platform\Capability;
use Symfony\Component\Form\Extension\Core\Type\LanguageType;
use Symfony\Component\Translation\TranslatableMessage as TM;
use Symfony\Component\Validator\Constraints\Language;
#[Settings(name: "ai_extractor", label: new TM("settings.ips.ai_extractor"), description: new TM("settings.ips.ai_extractor.description"))]
#[SettingsIcon("fa-plug")]
@ -56,4 +58,10 @@ class AIExtractorSettings
description: new TM("settings.ips.ai_extractor.max_content_length.description"),
)]
public int $maxContentLength = 50000;
#[Language]
#[SettingsParameter(label: new TM("settings.ips.ai_extractor.output_language"), description: new TM("settings.ips.ai_extractor.output_language.description"),
formType: LanguageType::class,
)]
public ?string $outputLanguage = null;
}

View file

@ -13103,5 +13103,29 @@ Buerklin-API Authentication server:
<target>The AI model that should be used for extraction. Must support structured output.</target>
</segment>
</unit>
<unit id="H1SYgGs" name="settings.ips.ai_extractor.max_content_length">
<segment>
<source>settings.ips.ai_extractor.max_content_length</source>
<target>Max. Website Content length</target>
</segment>
</unit>
<unit id="SZWiZE3" name="settings.ips.ai_extractor.max_content_length.description">
<segment>
<source>settings.ips.ai_extractor.max_content_length.description</source>
<target>The maximum number of characters of the website that are sent to the AI service.</target>
</segment>
</unit>
<unit id="pCsAHOv" name="settings.ips.ai_extractor.output_language">
<segment>
<source>settings.ips.ai_extractor.output_language</source>
<target>Output language</target>
</segment>
</unit>
<unit id="NVHHgpD" name="settings.ips.ai_extractor.output_language.description">
<segment>
<source>settings.ips.ai_extractor.output_language.description</source>
<target>By default, the providers returns information in the same language as the website. With that option you can ask the AI to translate it for you. Might only work with certain models.</target>
</segment>
</unit>
</file>
</xliff>