diff --git a/src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php b/src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php new file mode 100644 index 000000000..d075ea1d4 --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/AIInfoExtractor.php @@ -0,0 +1,490 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +use App\Entity\Parts\ManufacturingStatus; +use App\Services\InfoProviderSystem\DTOs\FileDTO; +use App\Services\InfoProviderSystem\DTOs\ParameterDTO; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\PriceDTO; +use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use App\Settings\InfoProviderSystem\AIExtractorSettings; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +class AIInfoExtractor implements InfoProviderInterface +{ + private const DISTRIBUTOR_NAME = 'AI Extracted'; + + private readonly HttpClientInterface $httpClient; + + public function __construct(HttpClientInterface $httpClient, private readonly AIExtractorSettings $settings) + { + $this->httpClient = $httpClient->withOptions([ + 'timeout' => 30, + 'headers' => [ + 'User-Agent' => 'Mozilla/5.0 (compatible; Part-DB AI-Extractor/1.0)', + ], + ]); + } + + public function getProviderInfo(): array + { + return [ + 'name' => 'AI Information Extractor', + 'description' => 'Extract part info from any URL using OpenRouter LLM', + 'url' => 'https://openrouter.ai', + 'disabled_help' => 'Configure OpenRouter API key in settings', + 'settings_class' => AIExtractorSettings::class, + ]; + } + + public function getProviderKey(): string + { + return 'ai_extractor'; + } + + public function isActive(): bool + { + return !empty($this->settings->apiKey) && $this->settings->enabled; + } + + public function searchByKeyword(string $keyword): array + { + // Treat the keyword as a URL and return a single search result + $url = $this->normalizeURL($keyword); + + try { + $part = $this->getDetails($url); + return [ + new SearchResultDTO( + provider_key: $this->getProviderKey(), + provider_id: $url, + name: $part->name, + description: $part->description, + category: $part->category, + manufacturer: $part->manufacturer, + mpn: $part->mpn, + preview_image_url: $part->preview_image_url, + manufacturing_status: $part->manufacturing_status, + provider_url: $part->provider_url, + footprint: $part->footprint, + gtin: $part->gtin, + ), + ]; + } catch (\Throwable $e) { + // Return empty array on error + return []; + } + } + + public function getDetails(string $id): PartDetailDTO + { + $url = $this->normalizeURL($id); + + // Fetch HTML content + $response = $this->httpClient->request('GET', $url); + $html = $response->getContent(); + + // Clean HTML + $cleanedHtml = $this->cleanHTML($html); + + // Truncate to max content length + $truncatedHtml = $this->truncateHTML($cleanedHtml, $this->settings->maxContentLength); + + // Call OpenRouter API + $llmResponse = $this->callOpenRouterAPI($truncatedHtml, $url); + + // Parse JSON response + $data = json_decode($llmResponse, true, 512, JSON_THROW_ON_ERROR); + + // Build and return PartDetailDTO + return $this->buildPartDetailDTO($data, $url); + } + + public function getCapabilities(): array + { + return [ + ProviderCapabilities::BASIC, + ProviderCapabilities::PICTURE, + ProviderCapabilities::DATASHEET, + ProviderCapabilities::PRICE, + ProviderCapabilities::PARAMETERS, + ]; + } + + private function normalizeURL(string $url): string + { + // Add https:// if no protocol + if (!preg_match('/^https?:\/\//', $url)) { + $url = 'https://' . ltrim($url, '/'); + } + + // Validate URL + if (filter_var($url, FILTER_VALIDATE_URL) === false) { + throw new \InvalidArgumentException("Invalid URL: $url"); + } + + return $url; + } + + private function cleanHTML(string $html): string + { + // Remove script tags + $html = preg_replace('/]*>(.*?)<\/script>/is', '', $html); + + // Remove style tags + $html = preg_replace('/]*>(.*?)<\/style>/is', '', $html); + + // Remove nav tags + $html = preg_replace('/]*>(.*?)<\/nav>/is', '', $html); + + // Remove footer tags + $html = preg_replace('/]*>(.*?)<\/footer>/is', '', $html); + + // Remove header tags + $html = preg_replace('/]*>(.*?)<\/header>/is', '', $html); + + // Remove HTML comments + $html = preg_replace('//is', '', $html); + + return $html; + } + + private function truncateHTML(string $html, int $maxLength): string + { + if (strlen($html) <= $maxLength) { + return $html; + } + + // Truncate and find the last > or space to avoid cutting tags + $truncated = substr($html, 0, $maxLength); + + // Find the last occurrence of > or space + $lastPos = max(strrpos($truncated, '>'), strrpos($truncated, ' ')); + + if ($lastPos !== false && $lastPos > $maxLength * 0.9) { + $truncated = substr($truncated, 0, $lastPos + 1); + } + + return $truncated; + } + + private function callOpenRouterAPI(string $htmlContent, string $url): string + { + $systemPrompt = $this->buildSystemPrompt(); + + // Define the tool/function for structured output + $toolDefinition = [ + 'type' => 'function', + 'function' => [ + 'name' => 'extract_part_info', + 'description' => 'Extract electronic component information from a webpage', + 'parameters' => [ + 'type' => 'object', + 'properties' => [ + 'name' => ['type' => 'string', 'description' => 'Product name'], + 'description' => ['type' => 'string', 'description' => 'Product description'], + 'manufacturer' => ['type' => ['string', 'null'], 'description' => 'Manufacturer name'], + 'mpn' => ['type' => ['string', 'null'], 'description' => 'Manufacturer Part Number'], + 'category' => ['type' => ['string', 'null'], 'description' => 'Product category'], + 'manufacturing_status' => ['type' => ['string', 'null'], 'enum' => ['active', 'obsolete', 'nrfnd', 'discontinued', null], 'description' => 'Manufacturing status'], + 'footprint' => ['type' => ['string', 'null'], 'description' => 'Package/footprint type'], + 'mass' => ['type' => ['number', 'null'], 'description' => 'Mass in grams'], + 'parameters' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'name' => ['type' => 'string'], + 'value' => ['type' => 'string'], + 'unit' => ['type' => ['string', 'null']], + ], + 'required' => ['name', 'value'], + ], + ], + 'datasheets' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'url' => ['type' => 'string'], + 'description' => ['type' => 'string'], + ], + 'required' => ['url'], + ], + ], + 'images' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'url' => ['type' => 'string'], + 'description' => ['type' => 'string'], + ], + 'required' => ['url'], + ], + ], + 'vendor_infos' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'distributor_name' => ['type' => 'string'], + 'order_number' => ['type' => ['string', 'null']], + 'product_url' => ['type' => 'string'], + 'prices' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'minimum_quantity' => ['type' => 'integer'], + 'price' => ['type' => 'number'], + 'currency' => ['type' => 'string'], + ], + 'required' => ['minimum_quantity', 'price', 'currency'], + ], + ], + ], + 'required' => ['distributor_name', 'product_url'], + ], + ], + 'manufacturer_product_url' => ['type' => ['string', 'null'], 'description' => 'Manufacturer product page URL'], + ], + 'required' => ['name', 'description'], + ], + ], + ]; + + $payload = [ + 'model' => $this->settings->model, + 'messages' => [ + [ + 'role' => 'system', + 'content' => $systemPrompt, + ], + [ + 'role' => 'user', + 'content' => "Extract part information from this webpage content:\n\nURL: $url\n\n$htmlContent", + ], + ], + 'tools' => [$toolDefinition], + 'tool_choice' => ['type' => 'function', 'function' => ['name' => 'extract_part_info']], + 'max_tokens' => 4096, + 'temperature' => 0.1, + ]; + + $response = $this->httpClient->request('POST', 'https://openrouter.ai/api/v1/chat/completions', [ + 'headers' => [ + 'Authorization' => 'Bearer ' . $this->settings->apiKey, + 'Content-Type' => 'application/json', + 'HTTP-Referer' => 'https://github.com/Part-DB/Part-DB-server', + 'X-Title' => 'Part-DB AI Info Extractor', + ], + 'json' => $payload, + ]); + + $data = $response->toArray(); + + $message = $data['choices'][0]['message'] ?? null; + if ($message === null) { + throw new \RuntimeException('No response message from LLM'); + } + + // Check if the model used the tool/function call + if (isset($message['tool_calls']) && !empty($message['tool_calls'])) { + foreach ($message['tool_calls'] as $toolCall) { + if ($toolCall['function']['name'] === 'extract_part_info') { + return $toolCall['function']['arguments']; + } + } + } + + // Fallback to content if no tool call (some models might not support tool calling) + $content = $message['content'] ?? throw new \RuntimeException('No response content from LLM'); + + // Strip markdown code blocks if present (fallback for models without tool support) + $content = preg_replace('/^```(?:json)?\s*\n?/i', '', $content); + $content = preg_replace('/\n?```\s*$/i', '', $content); + $content = trim($content); + + return $content; + } + + private function buildSystemPrompt(): string + { + return <<<'PROMPT' +You are an expert at extracting electronic component information from web pages. Extract structured data in JSON format. + +Return ONLY a valid JSON object with this exact structure: +{ + "name": "string", + "description": "string", + "manufacturer": "string | null", + "mpn": "string | null", + "category": "string | null", + "manufacturing_status": "active|obsolete|nrfnd|discontinued|null", + "footprint": "string | null", + "mass": "number | null (in grams)", + "parameters": [{"name": "string", "value": "string", "unit": "string | null"}], + "datasheets": [{"url": "string", "description": "string"}], + "images": [{"url": "string", "description": "string"}], + "vendor_infos": [{ + "distributor_name": "string", + "order_number": "string | null", + "product_url": "string", + "prices": [{"minimum_quantity": int, "price": number, "currency": "string"}] + }], + "manufacturer_product_url": "string | null" +} + +Rules: +- manufacturing_status: Use "active", "obsolete", "nrfnd" (not recommended for new designs), "discontinued", or null +- parameters: Extract technical specs like voltage, current, temperature, etc. +- prices: Extract pricing tiers with minimum_quantity, price, and currency code +- URLs must be absolute (include https://...) +- If information is not found, use null +- Return ONLY the JSON, no explanation text + +For parameters, combine name, value, and unit. The unit should be separate if possible. +PROMPT; + } + + private function buildPartDetailDTO(array $data, string $url): PartDetailDTO + { + // Map manufacturing status + $manufacturingStatus = null; + if (!empty($data['manufacturing_status'])) { + $status = strtolower((string) $data['manufacturing_status']); + $manufacturingStatus = match ($status) { + 'active' => ManufacturingStatus::ACTIVE, + 'obsolete', 'discontinued' => ManufacturingStatus::DISCONTINUED, + 'nrfnd', 'not recommended for new designs' => ManufacturingStatus::NRFND, + 'eol' => ManufacturingStatus::EOL, + 'announced' => ManufacturingStatus::ANNOUNCED, + default => null, + }; + } + + // Build parameters + $parameters = null; + if (!empty($data['parameters']) && is_array($data['parameters'])) { + $parameters = []; + foreach ($data['parameters'] as $p) { + if (!empty($p['name'])) { + $value = $p['value'] ?? ''; + $unit = $p['unit'] ?? null; + // Combine value and unit for parsing + $valueWithUnit = $unit ? $value . ' ' . $unit : $value; + $parameters[] = ParameterDTO::parseValueField( + name: $p['name'], + value: $valueWithUnit + ); + } + } + } + + // Build datasheets + $datasheets = null; + if (!empty($data['datasheets']) && is_array($data['datasheets'])) { + $datasheets = []; + foreach ($data['datasheets'] as $d) { + if (!empty($d['url'])) { + $datasheets[] = new FileDTO( + url: $d['url'], + name: $d['description'] ?? 'Datasheet' + ); + } + } + } + + // Build images + $images = null; + if (!empty($data['images']) && is_array($data['images'])) { + $images = []; + foreach ($data['images'] as $i) { + if (!empty($i['url'])) { + $images[] = new FileDTO( + url: $i['url'], + name: $i['description'] ?? 'Image' + ); + } + } + } + + // Build vendor infos + $vendorInfos = null; + if (!empty($data['vendor_infos']) && is_array($data['vendor_infos'])) { + $vendorInfos = []; + foreach ($data['vendor_infos'] as $v) { + $prices = []; + if (!empty($v['prices']) && is_array($v['prices'])) { + foreach ($v['prices'] as $p) { + $prices[] = new PriceDTO( + minimum_discount_amount: (int) ($p['minimum_quantity'] ?? 1), + price: (string) ($p['price'] ?? 0), + currency_iso_code: $p['currency'] ?? 'USD', + price_related_quantity: (int) ($p['minimum_quantity'] ?? 1), + ); + } + } + + $vendorInfos[] = new PurchaseInfoDTO( + distributor_name: $v['distributor_name'] ?? self::DISTRIBUTOR_NAME, + order_number: $v['order_number'] ?? 'Unknown', + prices: $prices, + product_url: $v['product_url'] ?? $url, + ); + } + } + + // Get preview image URL + $previewImageUrl = null; + if (!empty($data['images']) && is_array($data['images']) && !empty($data['images'][0]['url'])) { + $previewImageUrl = $data['images'][0]['url']; + } + + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $url, + name: $data['name'] ?? 'Unknown', + description: $data['description'] ?? '', + category: $data['category'] ?? null, + manufacturer: $data['manufacturer'] ?? null, + mpn: $data['mpn'] ?? null, + preview_image_url: $previewImageUrl, + manufacturing_status: $manufacturingStatus, + provider_url: $url, + footprint: $data['footprint'] ?? null, + mass: isset($data['mass']) && is_numeric($data['mass']) ? (float) $data['mass'] : null, + notes: null, + datasheets: $datasheets, + images: $images, + parameters: $parameters, + vendor_infos: $vendorInfos, + manufacturer_product_url: $data['manufacturer_product_url'] ?? null, + ); + } +} diff --git a/src/Services/InfoProviderSystem/Providers/ProviderCapabilities.php b/src/Services/InfoProviderSystem/Providers/ProviderCapabilities.php index 21fba53b5..3a7d03e92 100644 --- a/src/Services/InfoProviderSystem/Providers/ProviderCapabilities.php +++ b/src/Services/InfoProviderSystem/Providers/ProviderCapabilities.php @@ -46,6 +46,9 @@ enum ProviderCapabilities /** Provider can provide GTIN for a part */ case GTIN; + /** Provider can provide parameters/specifications for a part */ + case PARAMETERS; + /** * Get the order index for displaying capabilities in a stable order. * @return int @@ -59,6 +62,7 @@ public function getOrderIndex(): int self::PRICE => 4, self::FOOTPRINT => 5, self::GTIN => 6, + self::PARAMETERS => 7, }; } @@ -71,6 +75,7 @@ public function getTranslationKey(): string self::DATASHEET => 'datasheet', self::PRICE => 'price', self::GTIN => 'gtin', + self::PARAMETERS => 'parameters', }; } @@ -83,6 +88,7 @@ public function getFAIconClass(): string self::DATASHEET => 'fa-file-alt', self::PRICE => 'fa-money-bill-wave', self::GTIN => 'fa-barcode', + self::PARAMETERS => 'fa-list-ul', }; } } diff --git a/src/Settings/InfoProviderSystem/AIExtractorSettings.php b/src/Settings/InfoProviderSystem/AIExtractorSettings.php new file mode 100644 index 000000000..c78ca96e1 --- /dev/null +++ b/src/Settings/InfoProviderSystem/AIExtractorSettings.php @@ -0,0 +1,58 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Settings\InfoProviderSystem; + +use App\Settings\SettingsIcon; +use Jbtronics\SettingsBundle\Metadata\EnvVarMode; +use Jbtronics\SettingsBundle\Settings\Settings; +use Jbtronics\SettingsBundle\Settings\SettingsParameter; +use Jbtronics\SettingsBundle\Settings\SettingsTrait; +use Symfony\Component\Translation\TranslatableMessage as TM; + +#[Settings(name: "ai_extractor", label: new TM("settings.ips.ai_extractor"), description: new TM("settings.ips.ai_extractor.description"))] +#[SettingsIcon("fa-robot")] +class AIExtractorSettings +{ + use SettingsTrait; + + #[SettingsParameter(label: new TM("settings.ips.ai_extractor.api_key"), description: new TM("settings.ips.ai_extractor.api_key.description"), + envVar: "string:PROVIDER_AI_EXTRACTOR_API_KEY", envVarMode: EnvVarMode::OVERWRITE + )] + public ?string $apiKey = null; + + #[SettingsParameter(label: new TM("settings.ips.ai_extractor.model"), description: new TM("settings.ips.ai_extractor.model.description"), + envVar: "string:PROVIDER_AI_EXTRACTOR_MODEL", envVarMode: EnvVarMode::OVERWRITE + )] + public string $model = 'z-ai/glm-4.7'; + + #[SettingsParameter(label: new TM("settings.ips.ai_extractor.enabled"), description: new TM("settings.ips.ai_extractor.enabled.description"), + envVar: "bool:PROVIDER_AI_EXTRACTOR_ENABLED", envVarMode: EnvVarMode::OVERWRITE + )] + public bool $enabled = false; + + #[SettingsParameter(label: new TM("settings.ips.ai_extractor.max_content_length"), description: new TM("settings.ips.ai_extractor.max_content_length.description"), + envVar: "int:PROVIDER_AI_EXTRACTOR_MAX_CONTENT_LENGTH", envVarMode: EnvVarMode::OVERWRITE + )] + public int $maxContentLength = 50000; +} diff --git a/src/Settings/InfoProviderSystem/InfoProviderSettings.php b/src/Settings/InfoProviderSystem/InfoProviderSettings.php index 248fcedcf..be0fe7461 100644 --- a/src/Settings/InfoProviderSystem/InfoProviderSettings.php +++ b/src/Settings/InfoProviderSystem/InfoProviderSettings.php @@ -75,4 +75,7 @@ class InfoProviderSettings #[EmbeddedSettings] public ?CanopySettings $canopy = null; + + #[EmbeddedSettings] + public ?AIExtractorSettings $aiExtractor = null; }