diff --git a/src/Controller/BrowserPluginController.php b/src/Controller/BrowserPluginController.php new file mode 100644 index 00000000..1bb95787 --- /dev/null +++ b/src/Controller/BrowserPluginController.php @@ -0,0 +1,139 @@ +. + */ + +declare(strict_types=1); + +namespace App\Controller; + +use App\Entity\UserSystem\User; +use App\Services\InfoProviderSystem\ProviderRegistry; +use App\Services\InfoProviderSystem\SubmittedPageStorage; +use App\Services\InfoProviderSystem\DTOs\BrowserSubmittedPage; +use App\Settings\InfoProviderSystem\BrowserPluginSettings; +use App\Settings\SystemSettings\CustomizationSettings; +use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; +use Symfony\Component\HttpFoundation\JsonResponse; +use Symfony\Component\HttpFoundation\Request; +use Symfony\Component\HttpFoundation\Response; +use Symfony\Component\HttpKernel\Attribute\MapRequestPayload; +use Symfony\Component\HttpKernel\Exception\HttpException; +use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException; +use Symfony\Component\Routing\Attribute\Route; +use Symfony\Component\Routing\Generator\UrlGeneratorInterface; + +/** + * Provides the endpoint used by browser extensions to submit the current page's HTML to Part-DB, + * so that info providers can use it instead of fetching the URL themselves. + */ +#[Route('/tools/info_providers')] +class BrowserPluginController extends AbstractController +{ + public function __construct( + private readonly SubmittedPageStorage $browserHtmlStorage, + private readonly ProviderRegistry $providerRegistry, + private readonly CustomizationSettings $customizationSettings, + private readonly BrowserPluginSettings $browserPluginSettings, + ) { + } + + private const URL_PROVIDER_KEYS = ['generic_web', 'ai_web']; + + /** + * Returns instance info for the browser extension: logged-in username, instance name, and active URL providers. + * + * Response: { "username": "admin", "instance_name": "Part-DB", "url_providers": [{"id": "generic_web", "label": "Generic Web URL"}] } + */ + #[Route('/browser_info', name: 'browser_plugin_info', methods: ['GET'])] + public function getInfo(): JsonResponse + { + $this->denyAccessUnlessGranted('@info_providers.create_parts'); + $this->throwIfDisabled(); + + $activeProviders = $this->providerRegistry->getActiveProviders(); + + $urlProviders = []; + foreach (self::URL_PROVIDER_KEYS as $key) { + if (isset($activeProviders[$key])) { + $urlProviders[] = [ + 'id' => $key, + 'label' => $activeProviders[$key]->getProviderInfo()['name'], + ]; + } + } + + $user = $this->getUser(); + if ($user instanceof User) { + $username = $user->getFullName(true); + } else { + $username = $user ? $user->getUserIdentifier() : "unknown"; + } + + return new JsonResponse([ + 'username' => $username, + 'instance_name' => $this->customizationSettings->instanceName, + 'url_providers' => $urlProviders, + ]); + } + + /** + * Accepts a JSON POST body with the HTML of the current page from a browser extension. + * Stores the HTML in the session via BrowserHtmlSessionStorage and returns a redirect URL + * pointing to the standard part-creation flow with use_browser_html=1. + * + * Expected JSON body: { "html": "", "url": "https://example.com/product", "provider": "generic_web" } + * The "provider" field is optional and defaults to "generic_web". Use "ai_web" for the AI extractor. + * Response: { "redirect_url": "https://partdb.example.com/en/part/from_info_provider/generic_web/https%3A%2F%2F.../create?use_browser_html=1&no_cache=1" } + */ + #[Route('/browser_html', name: 'browser_plugin_submit_html', methods: ['POST'])] + public function submitHtml(Request $request, + #[MapRequestPayload] + BrowserSubmittedPage $page + ): JsonResponse + { + $this->denyAccessUnlessGranted('@info_providers.create_parts'); + $this->throwIfDisabled(); + + $payload = $request->getPayload(); + + $provider = $payload->get('provider', null); + + // The maprequestpayload already validates the URL and HTML content: + $token = $this->browserHtmlStorage->store($page); + + if ($provider !== null) { + $redirectUrl = $this->generateUrl('info_providers_create_part', [ + 'providerKey' => $provider, + 'providerId' => $page->url, + 'submitted_page_token' => $token, + ], UrlGeneratorInterface::ABSOLUTE_URL); + } + + return new JsonResponse([ + 'redirect_url' => $redirectUrl ?? null, + ]); + } + + public function throwIfDisabled(): void + { + if (!$this->browserPluginSettings->enabled) { + throw HttpException::fromStatusCode(451, "Browser plugin feature is disabled by the administrator, ask him to enable it in system settings."); + } + } +} diff --git a/src/Controller/InfoProviderController.php b/src/Controller/InfoProviderController.php index 817a6651..28c281d0 100644 --- a/src/Controller/InfoProviderController.php +++ b/src/Controller/InfoProviderController.php @@ -28,6 +28,7 @@ use App\Entity\Parts\Part; use App\Exceptions\OAuthReconnectRequiredException; use App\Form\InfoProviderSystem\FromURLFormType; use App\Form\InfoProviderSystem\PartSearchType; +use App\Services\InfoProviderSystem\SubmittedPageStorage; use App\Services\InfoProviderSystem\ExistingPartFinder; use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\PartInfoRetriever; @@ -62,7 +63,8 @@ class InfoProviderController extends AbstractController private readonly PartInfoRetriever $infoRetriever, private readonly ExistingPartFinder $existingPartFinder, private readonly SettingsManagerInterface $settingsManager, - private readonly SettingsFormFactoryInterface $settingsFormFactory + private readonly SettingsFormFactoryInterface $settingsFormFactory, + private readonly SubmittedPageStorage $browserHtmlStorage, ) { @@ -221,7 +223,7 @@ class InfoProviderController extends AbstractController } #[Route('/from_url', name: 'info_providers_from_url')] - public function fromURL(Request $request, GenericWebProvider $provider, CreateFromUrlHelper $fromUrlHelper): Response + public function fromURL(Request $request, CreateFromUrlHelper $fromUrlHelper): Response { $this->denyAccessUnlessGranted('@info_providers.create_parts'); @@ -242,6 +244,12 @@ class InfoProviderController extends AbstractController $no_cache = $form->get('no_cache')->getData(); $skip_delegation = $form->get('skip_delegation')->getData(); + $submittedPageToken = $request->request->get('submitted_page_token', null); + if ($submittedPageToken !== null && $submittedPageToken !== '') { + $url = $this->browserHtmlStorage->retrieve($submittedPageToken)->url; + } + + try { //It's okay if we use the cached results here, as its just for convenience $searchResult = $this->infoRetriever->searchByKeyword( @@ -249,6 +257,7 @@ class InfoProviderController extends AbstractController providers: [$method], options: [ InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + InfoProviderInterface::OPTION_SUBMITTED_PAGE_TOKEN => $submittedPageToken, ] ); @@ -262,6 +271,7 @@ class InfoProviderController extends AbstractController 'providerId' => $searchResult->provider_id, 'no_cache' => $no_cache ? 1 : null, 'skip_delegation' => $skip_delegation ? 1 : null, + 'submitted_page_token' => $submittedPageToken ?: null, ]); } } catch (ExceptionInterface $e) { @@ -272,6 +282,7 @@ class InfoProviderController extends AbstractController return $this->render('info_providers/from_url/from_url.html.twig', [ 'form' => $form, 'partDetail' => $partDetail, + 'recentBrowserPages' => $this->browserHtmlStorage->getRecentPages(), ]); } diff --git a/src/Controller/PartController.php b/src/Controller/PartController.php index 735a48f8..c4c0e526 100644 --- a/src/Controller/PartController.php +++ b/src/Controller/PartController.php @@ -328,10 +328,12 @@ final class PartController extends AbstractController //Force info providers to not use cache, when retrieving part details for creating a new part, because otherwise we might end up with outdated information $no_cache = $request->query->getBoolean('no_cache', false); $skip_delegation = $request->query->getBoolean('skip_delegation', false); + $submitted_page_token = $request->query->getString('submitted_page_token'); $dto = $infoRetriever->getDetails($providerKey, $providerId, [ InfoProviderInterface::OPTION_NO_CACHE => $no_cache, InfoProviderInterface::OPTION_SKIP_DELEGATION => $skip_delegation, + InfoProviderInterface::OPTION_SUBMITTED_PAGE_TOKEN => $submitted_page_token, ]); $new_part = $infoRetriever->dtoToPart($dto); diff --git a/src/Services/InfoProviderSystem/DTOs/BrowserSubmittedPage.php b/src/Services/InfoProviderSystem/DTOs/BrowserSubmittedPage.php new file mode 100644 index 00000000..0f4fbf5f --- /dev/null +++ b/src/Services/InfoProviderSystem/DTOs/BrowserSubmittedPage.php @@ -0,0 +1,50 @@ +. + */ + +declare(strict_types=1); + +namespace App\Services\InfoProviderSystem\DTOs; + +use Symfony\Component\Validator\Constraints as Assert; + +/** + * Represents a webpage submitted by the browser extension, held temporarily in the application cache. + */ +final readonly class BrowserSubmittedPage +{ + /** + * @var string A unique token for this page, derived from the URL and HTML content. Used to identify the page in the cache without storing the full HTML in the session. + */ + public string $token; + + public function __construct( + #[Assert\Url()] + #[Assert\NotBlank] + public string $url, + #[Assert\NotBlank] + #[Assert\Length(max: 5 * 1024 * 1024)] // Limit to 5 MB to prevent abuse + public string $html, + #[Assert\NotBlank] + public string $title, + public \DateTimeImmutable $submittedAt = new \DateTimeImmutable(), + ) { + $this->token = hash('xxh3', $url . '|' . $html); + } +} diff --git a/src/Services/InfoProviderSystem/PartInfoRetriever.php b/src/Services/InfoProviderSystem/PartInfoRetriever.php index 6c10f10e..f5ff144d 100644 --- a/src/Services/InfoProviderSystem/PartInfoRetriever.php +++ b/src/Services/InfoProviderSystem/PartInfoRetriever.php @@ -175,15 +175,15 @@ final class PartInfoRetriever */ public function dtoToPart(PartDetailDTO $search_result): Part { - return $this->createPart($search_result->provider_key, $search_result->provider_id); + return $this->dto_to_entity_converter->convertPart($search_result); } /** * Use the given details to create a part entity */ - public function createPart(string $provider_key, string $part_id): Part + public function createPart(string $provider_key, string $part_id, array $options): Part { - $details = $this->getDetails($provider_key, $part_id); + $details = $this->getDetails($provider_key, $part_id, $options); return $this->dto_to_entity_converter->convertPart($details); } diff --git a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php index 79f07be8..6539e69b 100644 --- a/src/Services/InfoProviderSystem/Providers/AIWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/AIWebProvider.php @@ -27,12 +27,11 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; use App\Services\AI\AIPlatformRegistry; +use App\Services\InfoProviderSystem\SubmittedPageStorage; use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOJsonSchemaConverter; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Settings\InfoProviderSystem\AIExtractorSettings; -use Brick\Schema\SchemaReader; -use Imagine\Image\Format; use Jkphl\Micrometa; use League\HTMLToMarkdown\HtmlConverter; use Psr\Cache\CacheItemPoolInterface; @@ -62,6 +61,7 @@ final class AIWebProvider implements InfoProviderInterface private readonly DTOJsonSchemaConverter $jsonSchemaConverter, private readonly CacheItemPoolInterface $partInfoCache, private readonly CreateFromUrlHelper $createFromUrlHelper, + private readonly SubmittedPageStorage $browserHtmlStorage, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us $this->httpClient = (new RandomizeUseragentHttpClient(new NoPrivateNetworkHttpClient($httpClient)))->withOptions( @@ -142,9 +142,17 @@ final class AIWebProvider implements InfoProviderInterface return $cacheItem->get(); } - // Fetch HTML content - $response = $this->httpClient->request('GET', $url); - $html = $response->getContent(); + // Use pre-fetched browser HTML if the option is set and a stored page is available for this URL + $html = null; + if (($token = ($options[self::OPTION_SUBMITTED_PAGE_TOKEN] ?? '')) !== '') { + $html = $this->browserHtmlStorage->retrieve($token)?->html; + } + + //Otherwise fetch it ourselves. + if ($html === null) { + $response = $this->httpClient->request('GET', $url); + $html = $response->getContent(); + } //Convert html to markdown, to provide a cleaner input to the LLM. $markdown = $this->htmlToMarkdown($html, $url); @@ -176,9 +184,20 @@ final class AIWebProvider implements InfoProviderInterface */ private function extractStructuredData(string $html, string $url): string { - //Only parse microdata, json-ld and rdfa, as they are the most common formats for structured data on product pages. Links and microformat only create clutter for the LLM - $micrometa = new Micrometa\Ports\Parser(Micrometa\Ports\Format::JSON_LD | Micrometa\Ports\Format::MICRODATA | Micrometa\Ports\Format::RDFA_LITE); - $items = $micrometa($url, $html); + try { + //Only parse microdata, json-ld and rdfa, as they are the most common formats for structured data on product pages. Links and microformat only create clutter for the LLM + $micrometa = new Micrometa\Ports\Parser(Micrometa\Ports\Format::JSON_LD | Micrometa\Ports\Format::MICRODATA | Micrometa\Ports\Format::RDFA_LITE); + $items = $micrometa($url, $html); + } catch (\RuntimeException $exception) { + //If parsing fails, try again without rdfa, as it seems to cause problems on pages like ebay + try { + $micrometa = new Micrometa\Ports\Parser(Micrometa\Ports\Format::JSON_LD | Micrometa\Ports\Format::MICRODATA); + $items = $micrometa($url, $html); + } catch (\RuntimeException $exception) { + //If it still fails, return empty structured data + return '{}'; + } + } return json_encode($items->toObject(), JSON_THROW_ON_ERROR); } diff --git a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php index 06a9d4c1..45777f9e 100644 --- a/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php +++ b/src/Services/InfoProviderSystem/Providers/GenericWebProvider.php @@ -25,6 +25,7 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Exceptions\ProviderIDNotSupportedException; use App\Helpers\RandomizeUseragentHttpClient; +use App\Services\InfoProviderSystem\SubmittedPageStorage; use App\Services\InfoProviderSystem\CreateFromUrlHelper; use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; @@ -57,6 +58,7 @@ class GenericWebProvider implements InfoProviderInterface public function __construct(HttpClientInterface $httpClient, private readonly GenericWebProviderSettings $settings, private readonly CreateFromUrlHelper $createFromUrlHelper, + private readonly SubmittedPageStorage $browserHtmlStorage, ) { //Use NoPrivateNetworkHttpClient to prevent SSRF vulnerabilities, and RandomizeUseragentHttpClient to make it harder for servers to block us @@ -294,9 +296,17 @@ class GenericWebProvider implements InfoProviderInterface } } - //Try to get the webpage content - $response = $this->httpClient->request('GET', $url); - $content = $response->getContent(); + // Use pre-fetched browser HTML if the option is set and a stored page is available for this URL + $content = null; + if (($token = ($options[self::OPTION_SUBMITTED_PAGE_TOKEN] ?? '')) !== '') { + $content = $this->browserHtmlStorage->retrieve($token)?->html; + } + + //Otherwise, fetch the page content ourselves + if ($content === null) { + $response = $this->httpClient->request('GET', $url); + $content = $response->getContent(); + } $dom = new Crawler($content); diff --git a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php index a6e073a5..d3895795 100644 --- a/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php +++ b/src/Services/InfoProviderSystem/Providers/InfoProviderInterface.php @@ -30,6 +30,7 @@ interface InfoProviderInterface { public const OPTION_NO_CACHE = 'no_cache'; // if set to true, the provider should not use any cache and retrieve fresh data from the source public const OPTION_SKIP_DELEGATION = 'skip_delegation'; // if set to true, the provider should not delegate the request to other providers, even if it supports delegation. + public const OPTION_SUBMITTED_PAGE_TOKEN = 'submitted_page_token'; // if set to a non-empty string, the provider should use the browser-submitted page with the given token (and retrieve it from BrowserHtmlSessionStorage) /** * Get information about this provider diff --git a/src/Services/InfoProviderSystem/SubmittedPageStorage.php b/src/Services/InfoProviderSystem/SubmittedPageStorage.php new file mode 100644 index 00000000..5e623f57 --- /dev/null +++ b/src/Services/InfoProviderSystem/SubmittedPageStorage.php @@ -0,0 +1,131 @@ +. + */ + +declare(strict_types=1); + +namespace App\Services\InfoProviderSystem; + +use App\Services\InfoProviderSystem\DTOs\BrowserSubmittedPage; +use Psr\Cache\CacheItemPoolInterface; +use Symfony\Component\DomCrawler\Crawler; +use Symfony\Component\HttpFoundation\RequestStack; + +/** + * Stores browser-submitted pages for the browser extension feature. + * + * Each page is stored as a {@see BrowserSubmittedPage} DTO in the application cache with a short TTL. + * The session holds only a compact list of recently submitted URLs so that pages can be listed + * without bloating the session with HTML content. + */ +class SubmittedPageStorage +{ + private const CACHE_KEY_PREFIX = 'browser_plugin_html_'; + private const CACHE_TTL = 1800; // 30 minutes + private const SESSION_KEY = 'browser_plugin_recent_urls'; + private const MAX_RECENT = 10; + + public function __construct( + private readonly RequestStack $requestStack, + private readonly CacheItemPoolInterface $cache, + ) { + } + + /** + * Stores a submitted page in the cache and records its URL in the session's recent list. + * @return string The token under which the page was stored, derived from the URL and HTML. This token is used to retrieve the page later. It is the same value as $page->token. + */ + public function store(BrowserSubmittedPage $page): string + { + $item = $this->cache->getItem($this->cacheKey($page)); + $item->set($page); + $item->expiresAfter(self::CACHE_TTL); + $this->cache->save($item); + + $session = $this->requestStack->getSession(); + $tokens = array_values(array_filter( + $session->get(self::SESSION_KEY, []), + static fn(string $u): bool => $u !== $page->token, + )); + array_unshift($tokens, $page->token); + $session->set(self::SESSION_KEY, array_slice($tokens, 0, self::MAX_RECENT)); + + return $page->token; + } + + /** + * Retrieves the stored page via its token (which is derived from the URL and HTML). Returns null if not found or expired. + */ + public function retrieve(string $token): ?BrowserSubmittedPage + { + $item = $this->cache->getItem($this->cacheKey($token)); + if (!$item->isHit()) { + return null; + } + return $item->get(); + } + + /** + * Returns the list of recently submitted pages, newest first. + * Pages whose cache entry has expired are silently omitted. + * The list depends on the session and thus is per-browser and per-user. + * + * @return BrowserSubmittedPage[] + */ + public function getRecentPages(): array + { + $tokens = $this->requestStack->getSession()->get(self::SESSION_KEY, []); + $pages = []; + foreach ($tokens as $token) { + $page = $this->retrieve($token); + if ($page !== null) { + $pages[] = $page; + } + } + return $pages; + } + + /** + * Removes a page from both the cache and the recent list. + * @param BrowserSubmittedPage|string $page The page or its token to remove. + */ + public function remove(BrowserSubmittedPage|string $page): void + { + $this->cache->deleteItem($this->cacheKey($page)); + + $token = is_string($page) ? $page : $page->token; + + $session = $this->requestStack->getSession(); + //Remove the token from the recent list in the session: + $tokens = array_values(array_filter( + $session->get(self::SESSION_KEY, []), + static fn(string $u): bool => $u !== $token + )); + $session->set(self::SESSION_KEY, $tokens); + } + + private function cacheKey(BrowserSubmittedPage|string $token): string + { + if (!is_string($token)) { + $token = $token->token; + } + + return self::CACHE_KEY_PREFIX . $token; + } +} diff --git a/src/Settings/InfoProviderSystem/BrowserPluginSettings.php b/src/Settings/InfoProviderSystem/BrowserPluginSettings.php new file mode 100644 index 00000000..1ad5c50b --- /dev/null +++ b/src/Settings/InfoProviderSystem/BrowserPluginSettings.php @@ -0,0 +1,40 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Settings\InfoProviderSystem; + +use App\Settings\SettingsIcon; +use Jbtronics\SettingsBundle\Metadata\EnvVarMode; +use Jbtronics\SettingsBundle\Settings\Settings; +use Jbtronics\SettingsBundle\Settings\SettingsParameter; +use Symfony\Component\Translation\TranslatableMessage as TM; + +#[Settings(name: "browser_plugin", label: new TM("settings.ips.browser_plugin"), description: new TM("settings.ips.browser_plugin.description"))] +#[SettingsIcon("fa-cloud-arrow-up")] +class BrowserPluginSettings +{ + #[SettingsParameter(label: new TM("settings.ips.lcsc.enabled"), description: new TM("settings.ips.browser_plugin.enabled.help"), + envVar: "bool:BROWSER_PLUGIN_ENABLED", envVarMode: EnvVarMode::OVERWRITE + )] + public bool $enabled = false; +} diff --git a/src/Settings/InfoProviderSystem/InfoProviderSettings.php b/src/Settings/InfoProviderSystem/InfoProviderSettings.php index 3e2a27ef..96de19cb 100644 --- a/src/Settings/InfoProviderSystem/InfoProviderSettings.php +++ b/src/Settings/InfoProviderSystem/InfoProviderSettings.php @@ -37,6 +37,9 @@ class InfoProviderSettings #[EmbeddedSettings] public ?InfoProviderGeneralSettings $general = null; + #[EmbeddedSettings] + public ?BrowserPluginSettings $browserPlugin = null; + #[EmbeddedSettings] public ?GenericWebProviderSettings $genericWebProvider = null; diff --git a/templates/info_providers/from_url/from_url.html.twig b/templates/info_providers/from_url/from_url.html.twig index 49d4b116..3146c5a5 100644 --- a/templates/info_providers/from_url/from_url.html.twig +++ b/templates/info_providers/from_url/from_url.html.twig @@ -33,5 +33,31 @@ {{ form_row(form.submit) }} + + {% if recentBrowserPages is not empty %} +
+ +
+ +
+

{% trans %}browser_plugin.recent_pages.help{% endtrans %}

+
+ {% for page in recentBrowserPages %} + + {% endfor %} +
+
+
+ {% endif %} + {{ form_end(form) }} {% endblock %} diff --git a/tests/Controller/BrowserPluginControllerTest.php b/tests/Controller/BrowserPluginControllerTest.php new file mode 100644 index 00000000..8af82ce9 --- /dev/null +++ b/tests/Controller/BrowserPluginControllerTest.php @@ -0,0 +1,247 @@ +. + */ + +declare(strict_types=1); + +namespace App\Tests\Controller; + +use App\Entity\UserSystem\User; +use App\Settings\InfoProviderSystem\BrowserPluginSettings; +use PHPUnit\Framework\Attributes\Group; +use Symfony\Bundle\FrameworkBundle\Test\WebTestCase; +use Symfony\Component\HttpFoundation\Response; + +#[Group("slow")] +#[Group("DB")] +final class BrowserPluginControllerTest extends WebTestCase +{ + // --- GET /browser_info --- + + public function testGetInfoReturns401WhenNotAuthenticated(): void + { + $client = static::createClient(); + $client->request('GET', '/en/tools/info_providers/browser_info'); + + self::assertResponseStatusCodeSame(Response::HTTP_UNAUTHORIZED); + } + + public function testGetInfoReturnsForbiddenForUnprivilegedUser(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'noread'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('GET', '/en/tools/info_providers/browser_info'); + + $this->assertResponseStatusCodeSame(Response::HTTP_FORBIDDEN); + } + + public function testGetInfoReturns451WhenPluginDisabled(): void + { + $client = static::createClient(); + $this->loginAsUser($client, 'admin'); + // BrowserPluginSettings::$enabled defaults to false + + $client->request('GET', '/en/tools/info_providers/browser_info'); + + self::assertResponseStatusCodeSame(451); + } + + public function testGetInfoReturnsJsonWithExpectedKeys(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('GET', '/en/tools/info_providers/browser_info'); + + self::assertResponseStatusCodeSame(Response::HTTP_OK); + self::assertResponseHeaderSame('Content-Type', 'application/json'); + + $data = json_decode((string) $client->getResponse()->getContent(), true); + $this->assertArrayHasKey('username', $data); + $this->assertArrayHasKey('instance_name', $data); + $this->assertArrayHasKey('url_providers', $data); + $this->assertIsString($data['username']); + $this->assertIsString($data['instance_name']); + $this->assertIsArray($data['url_providers']); + $this->assertNotEmpty($data['username']); + $this->assertNotEmpty($data['instance_name']); + } + + public function testGetInfoUrlProvidersHaveIdAndLabel(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('GET', '/en/tools/info_providers/browser_info'); + + self::assertResponseStatusCodeSame(Response::HTTP_OK); + $data = json_decode((string) $client->getResponse()->getContent(), true); + + foreach ($data['url_providers'] as $provider) { + $this->assertArrayHasKey('id', $provider); + $this->assertArrayHasKey('label', $provider); + $this->assertIsString($provider['id']); + $this->assertIsString($provider['label']); + $this->assertNotEmpty($provider['id']); + $this->assertNotEmpty($provider['label']); + } + } + + // --- POST /browser_html --- + + public function testSubmitHtmlReturns401WhenNotAuthenticated(): void + { + $client = static::createClient(); + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode(['url' => 'https://example.com', 'html' => '', 'title' => 'Test'])); + + self::assertResponseStatusCodeSame(Response::HTTP_UNAUTHORIZED); + } + + public function testSubmitHtmlReturns451WhenPluginDisabled(): void + { + $client = static::createClient(); + $this->loginAsUser($client, 'admin'); + // BrowserPluginSettings::$enabled defaults to false + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode(['url' => 'https://example.com', 'html' => '', 'title' => 'Test'])); + + self::assertResponseStatusCodeSame(451); + } + + public function testSubmitHtmlWithValidDataAndProvider(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode([ + 'url' => 'https://example.com/product/123', + 'html' => 'Product page', + 'title' => 'Some Product', + 'provider' => 'generic_web', + ])); + + self::assertResponseStatusCodeSame(Response::HTTP_OK); + $data = json_decode((string) $client->getResponse()->getContent(), true); + $this->assertArrayHasKey('redirect_url', $data); + $this->assertNotNull($data['redirect_url']); + $this->assertStringContainsString('generic_web', (string) $data['redirect_url']); + } + + public function testSubmitHtmlWithoutProviderReturnsNullRedirectUrl(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode([ + 'url' => 'https://example.com/product/123', + 'html' => 'Product page', + 'title' => 'Some Product', + ])); + + self::assertResponseStatusCodeSame(Response::HTTP_OK); + $data = json_decode((string) $client->getResponse()->getContent(), true); + $this->assertArrayHasKey('redirect_url', $data); + $this->assertNull($data['redirect_url']); + } + + public function testSubmitHtmlWithInvalidJsonReturns400(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], 'this is not valid json {'); + + self::assertResponseStatusCodeSame(Response::HTTP_BAD_REQUEST); + } + + public function testSubmitHtmlWithMissingUrlReturns422(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode(['html' => '', 'title' => 'Test'])); + + self::assertResponseStatusCodeSame(Response::HTTP_UNPROCESSABLE_ENTITY); + } + + public function testSubmitHtmlWithMissingHtmlReturns422(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode(['url' => 'https://example.com', 'title' => 'Test'])); + + self::assertResponseStatusCodeSame(Response::HTTP_UNPROCESSABLE_ENTITY); + } + + public function testSubmitHtmlWithInvalidUrlReturns422(): void + { + $client = static::createClient(); + $client->disableReboot(); + $this->loginAsUser($client, 'admin'); + static::getContainer()->get(BrowserPluginSettings::class)->enabled = true; + + $client->request('POST', '/en/tools/info_providers/browser_html', [], [], [ + 'CONTENT_TYPE' => 'application/json', + ], json_encode(['url' => 'not-a-url', 'html' => '', 'title' => 'Test'])); + + self::assertResponseStatusCodeSame(Response::HTTP_UNPROCESSABLE_ENTITY); + } + + private function loginAsUser(mixed $client, string $username): void + { + $entityManager = static::getContainer()->get('doctrine')->getManager(); + $user = $entityManager->getRepository(User::class)->findOneBy(['name' => $username]); + if (!$user) { + $this->markTestSkipped("User '{$username}' not found in fixtures"); + } + $client->loginUser($user); + } +} diff --git a/tests/Services/InfoProviderSystem/DTOs/BrowserSubmittedPageTest.php b/tests/Services/InfoProviderSystem/DTOs/BrowserSubmittedPageTest.php new file mode 100644 index 00000000..bafff477 --- /dev/null +++ b/tests/Services/InfoProviderSystem/DTOs/BrowserSubmittedPageTest.php @@ -0,0 +1,86 @@ +. + */ + +declare(strict_types=1); + +namespace App\Tests\Services\InfoProviderSystem\DTOs; + +use App\Services\InfoProviderSystem\DTOs\BrowserSubmittedPage; +use PHPUnit\Framework\TestCase; + +final class BrowserSubmittedPageTest extends TestCase +{ + public function testTokenIsNonEmpty(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $this->assertNotEmpty($page->token); + } + + public function testTokenIsDeterministic(): void + { + $page1 = new BrowserSubmittedPage('https://example.com', '', 'Title A'); + $page2 = new BrowserSubmittedPage('https://example.com', '', 'Title B'); + + // Token is derived from URL + HTML only, title does not affect it + $this->assertSame($page1->token, $page2->token); + } + + public function testDifferentUrlProducesDifferentToken(): void + { + $page1 = new BrowserSubmittedPage('https://example.com/1', '', 'Test'); + $page2 = new BrowserSubmittedPage('https://example.com/2', '', 'Test'); + + $this->assertNotSame($page1->token, $page2->token); + } + + public function testDifferentHtmlProducesDifferentToken(): void + { + $page1 = new BrowserSubmittedPage('https://example.com', 'A', 'Test'); + $page2 = new BrowserSubmittedPage('https://example.com', 'B', 'Test'); + + $this->assertNotSame($page1->token, $page2->token); + } + + public function testTokenMatchesPageTokenProperty(): void + { + $page = new BrowserSubmittedPage('https://example.com', 'content', 'Test'); + $expected = hash('xxh3', 'https://example.com|content'); + + $this->assertSame($expected, $page->token); + } + + public function testDefaultSubmittedAtIsNow(): void + { + $before = new \DateTimeImmutable(); + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $after = new \DateTimeImmutable(); + + $this->assertGreaterThanOrEqual($before->getTimestamp(), $page->submittedAt->getTimestamp()); + $this->assertLessThanOrEqual($after->getTimestamp(), $page->submittedAt->getTimestamp()); + } + + public function testCustomSubmittedAt(): void + { + $dt = new \DateTimeImmutable('2025-01-01 12:00:00'); + $page = new BrowserSubmittedPage('https://example.com', '', 'Test', $dt); + + $this->assertSame($dt, $page->submittedAt); + } +} diff --git a/tests/Services/InfoProviderSystem/SubmittedPageStorageTest.php b/tests/Services/InfoProviderSystem/SubmittedPageStorageTest.php new file mode 100644 index 00000000..d754b2e1 --- /dev/null +++ b/tests/Services/InfoProviderSystem/SubmittedPageStorageTest.php @@ -0,0 +1,181 @@ +. + */ + +declare(strict_types=1); + +namespace App\Tests\Services\InfoProviderSystem; + +use App\Services\InfoProviderSystem\DTOs\BrowserSubmittedPage; +use App\Services\InfoProviderSystem\SubmittedPageStorage; +use PHPUnit\Framework\TestCase; +use Symfony\Component\Cache\Adapter\ArrayAdapter; +use Symfony\Component\HttpFoundation\Request; +use Symfony\Component\HttpFoundation\RequestStack; +use Symfony\Component\HttpFoundation\Session\Session; +use Symfony\Component\HttpFoundation\Session\Storage\MockArraySessionStorage; + +final class SubmittedPageStorageTest extends TestCase +{ + private SubmittedPageStorage $storage; + private Session $session; + + protected function setUp(): void + { + $this->session = new Session(new MockArraySessionStorage()); + $request = new Request(); + $request->setSession($this->session); + $requestStack = new RequestStack(); + $requestStack->push($request); + + $this->storage = new SubmittedPageStorage($requestStack, new ArrayAdapter()); + } + + public function testStoreReturnsToken(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $token = $this->storage->store($page); + + $this->assertSame($page->token, $token); + } + + public function testStoreAndRetrieve(): void + { + $page = new BrowserSubmittedPage('https://example.com', 'content', 'Test Page'); + $token = $this->storage->store($page); + + $retrieved = $this->storage->retrieve($token); + + $this->assertNotNull($retrieved); + $this->assertSame($page->url, $retrieved->url); + $this->assertSame($page->html, $retrieved->html); + $this->assertSame($page->title, $retrieved->title); + $this->assertSame($page->token, $retrieved->token); + } + + public function testRetrieveReturnsNullForUnknownToken(): void + { + $this->assertNull($this->storage->retrieve('nonexistent_token_xyz')); + } + + public function testStoreReturnsSameTokenForSameUrlAndHtml(): void + { + $page1 = new BrowserSubmittedPage('https://example.com', '', 'Title One'); + $page2 = new BrowserSubmittedPage('https://example.com', '', 'Title Two'); + + $this->assertSame($this->storage->store($page1), $this->storage->store($page2)); + } + + public function testRemoveByTokenDeletesFromCache(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $token = $this->storage->store($page); + + $this->storage->remove($token); + + $this->assertNull($this->storage->retrieve($token)); + } + + public function testRemoveByPageObjectDeletesFromCache(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $this->storage->store($page); + + $this->storage->remove($page); + + $this->assertNull($this->storage->retrieve($page->token)); + } + + public function testRemoveDeletesFromSession(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $this->storage->store($page); + + $this->storage->remove($page); + + $this->assertEmpty($this->storage->getRecentPages()); + } + + public function testGetRecentPagesReturnsStoredPages(): void + { + $page1 = new BrowserSubmittedPage('https://example.com/1', '1', 'Page 1'); + $page2 = new BrowserSubmittedPage('https://example.com/2', '2', 'Page 2'); + $this->storage->store($page1); + $this->storage->store($page2); + + $recent = $this->storage->getRecentPages(); + + $this->assertCount(2, $recent); + } + + public function testGetRecentPagesReturnsNewestFirst(): void + { + $page1 = new BrowserSubmittedPage('https://example.com/1', '1', 'Page 1'); + $page2 = new BrowserSubmittedPage('https://example.com/2', '2', 'Page 2'); + $this->storage->store($page1); + $this->storage->store($page2); + + $recent = $this->storage->getRecentPages(); + + $this->assertSame($page2->url, $recent[0]->url); + $this->assertSame($page1->url, $recent[1]->url); + } + + public function testStoreDeduplicatesSamePageInSession(): void + { + $page = new BrowserSubmittedPage('https://example.com', '', 'Test'); + $this->storage->store($page); + $this->storage->store($page); + + $this->assertCount(1, $this->storage->getRecentPages()); + } + + public function testStoreMovesResubmittedPageToTop(): void + { + $page1 = new BrowserSubmittedPage('https://example.com/1', '1', 'Page 1'); + $page2 = new BrowserSubmittedPage('https://example.com/2', '2', 'Page 2'); + $this->storage->store($page1); + $this->storage->store($page2); + // Resubmit page1 — it should move back to the top + $this->storage->store($page1); + + $recent = $this->storage->getRecentPages(); + + $this->assertSame($page1->url, $recent[0]->url); + $this->assertSame($page2->url, $recent[1]->url); + } + + public function testGetRecentPagesSilentlyOmitsExpiredEntries(): void + { + // Put a token in the session that has no corresponding cache entry (simulates expiry) + $this->session->set('browser_plugin_recent_urls', ['expired_token_xyz']); + + $this->assertEmpty($this->storage->getRecentPages()); + } + + public function testSessionCappedAtTenEntries(): void + { + for ($i = 0; $i < 12; $i++) { + $page = new BrowserSubmittedPage("https://example.com/{$i}", "{$i}", "Page {$i}"); + $this->storage->store($page); + } + + $this->assertCount(10, $this->storage->getRecentPages()); + } +} diff --git a/translations/messages.en.xlf b/translations/messages.en.xlf index 0044edcc..b5fec280 100644 --- a/translations/messages.en.xlf +++ b/translations/messages.en.xlf @@ -13607,5 +13607,35 @@ Buerklin-API Authentication server: Host URL + + + browser_plugin.recent_pages.title + Recent browser submissions + + + + + browser_plugin.recent_pages.help + Pages recently submitted from your browser extension. Click to create a part using the captured HTML. + + + + + settings.ips.browser_plugin + Browser plugin + + + + + settings.ips.browser_plugin.description + The browser plugin allows to submit pages to Part-DB directly from a browser to create new parts. HTML content is submitted, so that extraction even works on DDOS protected pages, or pages requiring javascript for correct rendering. The Generic Web or AI Web extractor needs to be enabled to be useful. + + + + + settings.ips.browser_plugin.enabled.help + When enabled users with the info provider permission can submit pages to Part-DB and retrieve them later. + +