chore: update to llm-chain 0.24

chr-hertel · chr-hertel · commit a31ba8c7b521 · 2025-07-06T20:16:27.000+02:00
diff --git a/composer.json b/composer.json
@@ -9,7 +9,10 @@
         "ext-iconv": "*",
         "codewithkyrian/chromadb-php": "^0.4.0",
         "league/commonmark": "^2.7",
-        "php-llm/llm-chain-bundle": "^0.22",
+        "mrmysql/youtube-transcript": "^0.0.5",
+        "php-http/discovery": "^1.20",
+        "php-llm/llm-chain-bundle": "^0.24",
+        "psr/http-factory-implementation": "*",
         "runtime/frankenphp-symfony": "^0.2.0",
         "symfony/asset": "7.3.*",
         "symfony/asset-mapper": "7.3.*",
diff --git a/composer.lock b/composer.lock
diff --git a/config/packages/http_discovery.yaml b/config/packages/http_discovery.yaml
@@ -0,0 +1,10 @@
+services:
+    Psr\Http\Message\RequestFactoryInterface: '@http_discovery.psr17_factory'
+    Psr\Http\Message\ResponseFactoryInterface: '@http_discovery.psr17_factory'
+    Psr\Http\Message\ServerRequestFactoryInterface: '@http_discovery.psr17_factory'
+    Psr\Http\Message\StreamFactoryInterface: '@http_discovery.psr17_factory'
+    Psr\Http\Message\UploadedFileFactoryInterface: '@http_discovery.psr17_factory'
+    Psr\Http\Message\UriFactoryInterface: '@http_discovery.psr17_factory'
+
+    http_discovery.psr17_factory:
+        class: Http\Discovery\Psr17Factory
diff --git a/config/packages/llm_chain.yaml b/config/packages/llm_chain.yaml
@@ -44,7 +44,7 @@ llm_chain:
         chroma_db:
             symfonycon:
                 collection: 'symfony_blog'
-    embedder:
+    indexer:
         default:
             model:
                 name: 'Embeddings'
@@ -61,5 +61,5 @@ services:
     #     $apiKey: '%env(SERP_API_KEY)%'
     PhpLlm\LlmChain\Chain\Toolbox\Tool\Wikipedia: ~
     PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch:
-        $model: '@llm_chain.embedder.default.model'
+        $model: '@llm_chain.indexer.default.model'
 
diff --git a/src/Audio/Chat.php b/src/Audio/Chat.php
@@ -10,7 +10,6 @@
 use PhpLlm\LlmChain\Platform\Message\Message;
 use PhpLlm\LlmChain\Platform\Message\MessageBag;
 use PhpLlm\LlmChain\Platform\PlatformInterface;
-use PhpLlm\LlmChain\Platform\Response\AsyncResponse;
 use PhpLlm\LlmChain\Platform\Response\TextResponse;
 use Symfony\Component\DependencyInjection\Attribute\Autowire;
 use Symfony\Component\HttpFoundation\RequestStack;
@@ -34,11 +33,8 @@ public function say(string $base64audio): void
         file_put_contents($path, base64_decode($base64audio));
 
         $response = $this->platform->request(new Whisper(), Audio::fromFile($path));
-        assert($response instanceof AsyncResponse);
-        $response = $response->unwrap();
-        assert($response instanceof TextResponse);
 
-        $this->submitMessage($response->getContent());
+        $this->submitMessage($response->asText());
     }
 
     public function loadMessages(): MessageBag
diff --git a/src/Blog/Command/QueryCommand.php b/src/Blog/Command/QueryCommand.php
@@ -7,8 +7,6 @@
 use Codewithkyrian\ChromaDB\Client;
 use PhpLlm\LlmChain\Platform\Bridge\OpenAI\Embeddings;
 use PhpLlm\LlmChain\Platform\PlatformInterface;
-use PhpLlm\LlmChain\Platform\Response\AsyncResponse;
-use PhpLlm\LlmChain\Platform\Response\VectorResponse;
 use Symfony\Component\Console\Attribute\AsCommand;
 use Symfony\Component\Console\Command\Command;
 use Symfony\Component\Console\Input\InputInterface;
@@ -44,11 +42,8 @@ protected function execute(InputInterface $input, OutputInterface $output): int
         $io->comment('Results are limited to 4 most similar documents.');
 
         $platformResponse = $this->platform->request(new Embeddings(), $search);
-        assert($platformResponse instanceof AsyncResponse);
-        $platformResponse = $platformResponse->unwrap();
-        assert($platformResponse instanceof VectorResponse);
         $queryResponse = $collection->query(
-            queryEmbeddings: [$platformResponse->getContent()[0]->getData()],
+            queryEmbeddings: [$platformResponse->asVectors()[0]->getData()],
             nResults: 4,
         );
 
diff --git a/src/Blog/Embedder.php b/src/Blog/Embedder.php
@@ -6,13 +6,13 @@
 
 use PhpLlm\LlmChain\Store\Document\Metadata;
 use PhpLlm\LlmChain\Store\Document\TextDocument;
-use PhpLlm\LlmChain\Store\Embedder as LlmChainEmbedder;
+use PhpLlm\LlmChain\Store\Indexer;
 
 final readonly class Embedder
 {
     public function __construct(
         private FeedLoader $loader,
-        private LlmChainEmbedder $embedder,
+        private Indexer $indexer,
     ) {
     }
 
@@ -23,6 +23,6 @@ public function embedBlog(): void
             $documents[] = new TextDocument($post->id, $post->toString(), new Metadata($post->toArray()));
         }
 
-        $this->embedder->embed($documents);
+        $this->indexer->index($documents);
     }
 }
diff --git a/src/Video/TwigComponent.php b/src/Video/TwigComponent.php
@@ -9,8 +9,6 @@
 use PhpLlm\LlmChain\Platform\Message\Message;
 use PhpLlm\LlmChain\Platform\Message\MessageBag;
 use PhpLlm\LlmChain\Platform\PlatformInterface;
-use PhpLlm\LlmChain\Platform\Response\AsyncResponse;
-use PhpLlm\LlmChain\Platform\Response\TextResponse;
 use Symfony\UX\LiveComponent\Attribute\AsLiveComponent;
 use Symfony\UX\LiveComponent\Attribute\LiveAction;
 use Symfony\UX\LiveComponent\Attribute\LiveArg;
@@ -46,10 +44,6 @@ public function submit(#[LiveArg] string $instruction, #[LiveArg] string $image)
             'max_tokens' => 100,
         ]);
 
-        assert($response instanceof AsyncResponse);
-        $response = $response->unwrap();
-        assert($response instanceof TextResponse);
-
-        $this->caption = $response->getContent();
+        $this->caption = $response->asText();
     }
 }
diff --git a/src/YouTube/TranscriptFetcher.php b/src/YouTube/TranscriptFetcher.php
@@ -4,7 +4,8 @@
 
 namespace App\YouTube;
 
-use Symfony\Component\DomCrawler\Crawler;
+use MrMySQL\YoutubeTranscript\TranscriptListFetcher;
+use Symfony\Component\HttpClient\Psr18Client;
 use Symfony\Contracts\HttpClient\HttpClientInterface;
 
 final class TranscriptFetcher
@@ -16,41 +17,14 @@ public function __construct(
 
     public function fetchTranscript(string $videoId): string
     {
-        // Fetch the HTML content of the YouTube video page
-        $htmlResponse = $this->client->request('GET', 'https://youtube.com/watch?v='.$videoId);
-        $html = $htmlResponse->getContent();
+        $psr18Client = new Psr18Client($this->client);
+        $fetcher = new TranscriptListFetcher($psr18Client, $psr18Client, $psr18Client);
 
-        // Use DomCrawler to parse the HTML
-        $crawler = new Crawler($html);
+        $list = $fetcher->fetch($videoId);
+        $transcript = $list->findTranscript($list->getAvailableLanguageCodes());
 
-        // Extract the script containing the ytInitialPlayerResponse
-        $scriptContent = $crawler->filter('script')->reduce(function (Crawler $node) {
-            return str_contains($node->text(), 'var ytInitialPlayerResponse = {');
-        })->text();
-
-        // Extract and parse the JSON data from the script
-        $start = strpos($scriptContent, 'var ytInitialPlayerResponse = ') + strlen('var ytInitialPlayerResponse = ');
-        $dataString = substr($scriptContent, $start);
-        $dataString = substr($dataString, 0, strrpos($dataString, ';') ?: null);
-        $data = json_decode(trim($dataString), true);
-
-        // Extract the URL for the captions
-        if (!isset($data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'])) {
-            throw new \Exception('Captions are not available for this video.');
-        }
-        $captionsUrl = $data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'];
-
-        // Fetch and parse the captions XML
-        $xmlResponse = $this->client->request('GET', $captionsUrl);
-        $xmlContent = $xmlResponse->getContent();
-        $xmlCrawler = new Crawler($xmlContent);
-
-        // Collect all text elements from the captions
-        $transcript = $xmlCrawler->filter('text')->each(function (Crawler $node) {
-            return $node->text().' ';
-        });
-
-        // Combine all the text elements into one string
-        return implode(PHP_EOL, $transcript);
+        return array_reduce($transcript->fetch(), function (string $carry, array $item): string {
+            return $carry.\PHP_EOL.$item['text'];
+        }, '');
     }
 }
diff --git a/symfony.lock b/symfony.lock
@@ -20,6 +20,18 @@
             ".php-cs-fixer.dist.php"
         ]
     },
+    "php-http/discovery": {
+        "version": "1.20",
+        "recipe": {
+            "repo": "github.com/symfony/recipes",
+            "branch": "main",
+            "version": "1.18",
+            "ref": "f45b5dd173a27873ab19f5e3180b2f661c21de02"
+        },
+        "files": [
+            "config/packages/http_discovery.yaml"
+        ]
+    },
     "php-llm/llm-chain-bundle": {
         "version": "dev-main"
     },

Original file line number	Diff line number	Diff line change
`@@ -6,13 +6,13 @@`
`6`	`6`
`7`	`7`	`use PhpLlm\LlmChain\Store\Document\Metadata;`
`8`	`8`	`use PhpLlm\LlmChain\Store\Document\TextDocument;`
`9`		`-use PhpLlm\LlmChain\Store\Embedder as LlmChainEmbedder;`
	`9`	`+use PhpLlm\LlmChain\Store\Indexer;`
`10`	`10`
`11`	`11`	`final readonly class Embedder`
`12`	`12`	`{`
`13`	`13`	`public function __construct(`
`14`	`14`	`private FeedLoader $loader,`
`15`		`- private LlmChainEmbedder $embedder,`
	`15`	`+ private Indexer $indexer,`
`16`	`16`	`) {`
`17`	`17`	`}`
`18`	`18`
`@@ -23,6 +23,6 @@ public function embedBlog(): void`
`23`	`23`	`$documents[] = new TextDocument($post->id, $post->toString(), new Metadata($post->toArray()));`
`24`	`24`	`}`
`25`	`25`
`26`		`- $this->embedder->embed($documents);`
	`26`	`+ $this->indexer->index($documents);`
`27`	`27`	`}`
`28`	`28`	`}`