From 838753113326bf7fa248677a9f498a3364ee3787 Mon Sep 17 00:00:00 2001 From: Alessio Vertemati Date: Tue, 25 Feb 2025 16:20:20 +0100 Subject: [PATCH] Add LLamaCloud support --- src/DocumentProcessor.php | 7 ++++++ tests/ParseProcessorSelectionTest.php | 31 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/DocumentProcessor.php b/src/DocumentProcessor.php index 7b33a12..4c6a9d8 100644 --- a/src/DocumentProcessor.php +++ b/src/DocumentProcessor.php @@ -17,4 +17,11 @@ enum DocumentProcessor: string * Uses https://github.com/pymupdf/PyMuPDF as document processor to extract text */ case PYMUPDF = 'pymupdf'; + + /** + * The LLama Parse processor + * + * Uses LLamaCloud https://cloud.llamaindex.ai/ as document processor to extract text + */ + case LLAMAPARSE = 'llama'; } diff --git a/tests/ParseProcessorSelectionTest.php b/tests/ParseProcessorSelectionTest.php index 28777e3..b929c99 100644 --- a/tests/ParseProcessorSelectionTest.php +++ b/tests/ParseProcessorSelectionTest.php @@ -68,3 +68,34 @@ $mockClient->assertSentCount(1); }); + +test('llamaparse can be selected as processor', function () { + $mockClient = MockClient::global([ + ExtractTextRequest::class => MockResponse::fixture('extract-text-empty'), + ]); + + $connector = new ParseConnector('fake', 'http://localhost:5002'); + $connector->withMockClient($mockClient); + + $connector->parse( + url: 'http://localhost/empty.pdf', + options: new ParseOption(DocumentProcessor::LLAMAPARSE), + ); + + $mockClient->assertSent(ExtractTextRequest::class); + + $mockClient->assertSent(function (Request $request, Response $response) { + if (! $request instanceof ExtractTextRequest) { + return false; + } + + /** @var array */ + $body = $request->body()->all(); + + return $body['url'] === 'http://localhost/empty.pdf' + && $body['mime_type'] === 'application/pdf' + && $body['driver'] === 'llama'; + }); + + $mockClient->assertSentCount(1); +});