diff --git a/src/Connectors/ParseConnector.php b/src/Connectors/ParseConnector.php index 0cdd648..8f2adb7 100644 --- a/src/Connectors/ParseConnector.php +++ b/src/Connectors/ParseConnector.php @@ -76,12 +76,11 @@ public function hasRequestFailed(Response $response): ?bool * @param string $mimeType The mime type of the document. Default application/pdf * @param \OneOffTech\Parse\Client\ParseOption $options Specifiy additional options for the specific parsing processor */ - public function parse(string $url, string $mimeType = 'application/pdf', ?ParseOption $options = null): DocumentDto + public function parse(string $url, ?ParseOption $options = null): DocumentDto { return $this ->send((new ExtractTextRequest( url: $url, - mimeType: $mimeType, preferredDocumentProcessor: $options?->processor?->value ?? DocumentProcessor::PDFACT->value, ))->validate()) ->dto(); diff --git a/src/Requests/ExtractTextRequest.php b/src/Requests/ExtractTextRequest.php index 4194553..46cb605 100644 --- a/src/Requests/ExtractTextRequest.php +++ b/src/Requests/ExtractTextRequest.php @@ -18,7 +18,6 @@ class ExtractTextRequest extends Request implements HasBody public function __construct( protected readonly string $url, - protected readonly string $mimeType, protected readonly string $preferredDocumentProcessor = 'pdfact' ) { // @@ -33,7 +32,6 @@ protected function defaultBody(): array { return [ 'url' => $this->url, - 'mime_type' => $this->mimeType, 'driver' => $this->preferredDocumentProcessor ?? 'pdfact', ]; } @@ -44,10 +42,6 @@ public function validate(): self throw new InvalidArgumentException('The [url] is required to be non-empty.'); } - if (empty(trim($this->mimeType))) { - throw new InvalidArgumentException('The [mime type] is required to be non-empty.'); - } - return $this; } diff --git a/tests/ParseProcessorSelectionTest.php b/tests/ParseProcessorSelectionTest.php index 8791e61..523d1f8 100644 --- a/tests/ParseProcessorSelectionTest.php +++ b/tests/ParseProcessorSelectionTest.php @@ -30,7 +30,6 @@ $body = $request->body()->all(); return $body['url'] === 'http://localhost/empty.pdf' - && $body['mime_type'] === 'application/pdf' && $body['driver'] === 'pdfact'; }); @@ -62,7 +61,6 @@ $body = $request->body()->all(); return $body['url'] === 'http://localhost/empty.pdf' - && $body['mime_type'] === 'application/pdf' && $body['driver'] === 'pymupdf'; }); @@ -93,7 +91,6 @@ $body = $request->body()->all(); return $body['url'] === 'http://localhost/empty.pdf' - && $body['mime_type'] === 'application/pdf' && $body['driver'] === 'llama'; }); @@ -124,7 +121,6 @@ $body = $request->body()->all(); return $body['url'] === 'http://localhost/empty.pdf' - && $body['mime_type'] === 'application/pdf' && $body['driver'] === 'unstructured'; }); diff --git a/tests/ParseTest.php b/tests/ParseTest.php index a592711..4a715c9 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -20,7 +20,7 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient($mockClient); - $document = $connector->parse('http://localhost/base.pdf', 'application/pdf'); + $document = $connector->parse('http://localhost/base.pdf'); expect($document) ->toBeInstanceOf(DocumentDto::class) @@ -63,7 +63,7 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient($mockClient); - $document = $connector->parse('http://localhost/base.pdf', 'application/pdf', new ParseOption(DocumentProcessor::PYMUPDF)); + $document = $connector->parse('http://localhost/base.pdf', new ParseOption(DocumentProcessor::PYMUPDF)); expect($document) ->toBeInstanceOf(DocumentDto::class) @@ -91,7 +91,7 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient(MockClient::getGlobal()); - $document = $connector->parse('http://localhost/empty.pdf', 'application/pdf'); + $document = $connector->parse('http://localhost/empty.pdf'); expect($document) ->toBeInstanceOf(DocumentDto::class) @@ -113,7 +113,7 @@ /** @var array */ $body = $request->body()->all(); - return $body['url'] === 'http://localhost/empty.pdf' && $body['mime_type'] === 'application/pdf'; + return $body['url'] === 'http://localhost/empty.pdf'; }); $mockClient->assertSentCount(1); @@ -140,7 +140,7 @@ /** @var array */ $body = $request->body()->all(); - return $body['url'] === 'http://localhost/base.docx' && $body['mime_type'] === 'application/pdf'; + return $body['url'] === 'http://localhost/base.docx'; }); $mockClient->assertSentCount(1); @@ -155,7 +155,7 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient(MockClient::getGlobal()); - $connector->parse('http://localhost/test.pdf', 'application/pdf'); + $connector->parse('http://localhost/test.pdf'); $mockClient->assertSent(ExtractTextRequest::class); @@ -167,7 +167,7 @@ /** @var array */ $body = $request->body()->all(); - return $body['url'] === 'http://localhost/test.pdf' && $body['mime_type'] === 'application/pdf'; + return $body['url'] === 'http://localhost/test.pdf'; }); $mockClient->assertSentCount(1); @@ -182,6 +182,6 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient($mockClient); - $connector->parse('http://localhost/km-f.pdf', 'application/pdf'); + $connector->parse('http://localhost/km-f.pdf'); })->throws(ServiceUnavailableException::class, 'The pdfact service is not reachable.'); diff --git a/tests/ValidationTest.php b/tests/ValidationTest.php index 2fbb8b0..2f12d1a 100644 --- a/tests/ValidationTest.php +++ b/tests/ValidationTest.php @@ -13,22 +13,8 @@ $connector = new ParseConnector('fake', 'http://localhost:5002'); $connector->withMockClient(MockClient::getGlobal()); - $connector->parse('', 'application/pdf'); + $connector->parse(''); $mockClient->assertNothingSent(); })->throws(InvalidArgumentException::class, 'The [url] is required to be non-empty.'); - -test('mime type required to be non-null', function () { - $mockClient = MockClient::global([ - ExtractTextRequest::class => MockResponse::fixture('extract-text-invalid-mime'), - ]); - - $connector = new ParseConnector('fake', 'http://localhost:5002'); - $connector->withMockClient(MockClient::getGlobal()); - - $connector->parse('http://localhost/test.pdf', ''); - - $mockClient->assertNothingSent(); - -})->throws(InvalidArgumentException::class, 'The [mime type] is required to be non-empty.');