Added option for translating AI extracted output

This commit is contained in:
Jan Böhmer 2026-04-26 22:11:27 +02:00
parent 7117926584
commit 0ca5a41298
3 changed files with 42 additions and 1 deletions

View file

@ -35,6 +35,7 @@ use League\HTMLToMarkdown\HtmlConverter;
use Symfony\AI\Platform\Message\Message; use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Intl\Languages;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
use function Symfony\Component\String\u; use function Symfony\Component\String\u;
@ -210,7 +211,7 @@ final class AIInfoExtractor implements InfoProviderInterface
private function buildSystemPrompt(): string private function buildSystemPrompt(): string
{ {
return <<<'PROMPT' $tmp = <<<'PROMPT'
You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format, from markdown extracted from a product page. You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format, from markdown extracted from a product page.
Focus on the main content of the page, such as product descriptions, specifications, and tables. Ignore navigation menus, footers, and sidebars. Focus on the main content of the page, such as product descriptions, specifications, and tables. Ignore navigation menus, footers, and sidebars.
@ -224,6 +225,14 @@ Rules:
For parameters, combine name, value, and unit. The unit should be separate if possible. For parameters, combine name, value, and unit. The unit should be separate if possible.
PROMPT; PROMPT;
if ($this->settings->outputLanguage === null) {
$tmp .= "\n\nProvide the response in the same language of the webpage.";
} else {
$tmp .= "\n\nThe response must be in ". Languages::getName($this->settings->outputLanguage, 'en') ." language. Translate texts if needed.";
}
return $tmp;
} }
} }

View file

@ -32,7 +32,9 @@ use Jbtronics\SettingsBundle\Settings\Settings;
use Jbtronics\SettingsBundle\Settings\SettingsParameter; use Jbtronics\SettingsBundle\Settings\SettingsParameter;
use Jbtronics\SettingsBundle\Settings\SettingsTrait; use Jbtronics\SettingsBundle\Settings\SettingsTrait;
use Symfony\AI\Platform\Capability; use Symfony\AI\Platform\Capability;
use Symfony\Component\Form\Extension\Core\Type\LanguageType;
use Symfony\Component\Translation\TranslatableMessage as TM; use Symfony\Component\Translation\TranslatableMessage as TM;
use Symfony\Component\Validator\Constraints\Language;
#[Settings(name: "ai_extractor", label: new TM("settings.ips.ai_extractor"), description: new TM("settings.ips.ai_extractor.description"))] #[Settings(name: "ai_extractor", label: new TM("settings.ips.ai_extractor"), description: new TM("settings.ips.ai_extractor.description"))]
#[SettingsIcon("fa-plug")] #[SettingsIcon("fa-plug")]
@ -56,4 +58,10 @@ class AIExtractorSettings
description: new TM("settings.ips.ai_extractor.max_content_length.description"), description: new TM("settings.ips.ai_extractor.max_content_length.description"),
)] )]
public int $maxContentLength = 50000; public int $maxContentLength = 50000;
#[Language]
#[SettingsParameter(label: new TM("settings.ips.ai_extractor.output_language"), description: new TM("settings.ips.ai_extractor.output_language.description"),
formType: LanguageType::class,
)]
public ?string $outputLanguage = null;
} }

View file

@ -13103,5 +13103,29 @@ Buerklin-API Authentication server:
<target>The AI model that should be used for extraction. Must support structured output.</target> <target>The AI model that should be used for extraction. Must support structured output.</target>
</segment> </segment>
</unit> </unit>
<unit id="H1SYgGs" name="settings.ips.ai_extractor.max_content_length">
<segment>
<source>settings.ips.ai_extractor.max_content_length</source>
<target>Max. Website Content length</target>
</segment>
</unit>
<unit id="SZWiZE3" name="settings.ips.ai_extractor.max_content_length.description">
<segment>
<source>settings.ips.ai_extractor.max_content_length.description</source>
<target>The maximum number of characters of the website that are sent to the AI service.</target>
</segment>
</unit>
<unit id="pCsAHOv" name="settings.ips.ai_extractor.output_language">
<segment>
<source>settings.ips.ai_extractor.output_language</source>
<target>Output language</target>
</segment>
</unit>
<unit id="NVHHgpD" name="settings.ips.ai_extractor.output_language.description">
<segment>
<source>settings.ips.ai_extractor.output_language.description</source>
<target>By default, the providers returns information in the same language as the website. With that option you can ask the AI to translate it for you. Might only work with certain models.</target>
</segment>
</unit>
</file> </file>
</xliff> </xliff>