Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/Connectors/ParseConnector.php
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,11 @@ public function hasRequestFailed(Response $response): ?bool
* @param string $mimeType The mime type of the document. Default application/pdf
* @param \OneOffTech\Parse\Client\ParseOption $options Specifiy additional options for the specific parsing processor
*/
public function parse(string $url, string $mimeType = 'application/pdf', ?ParseOption $options = null): DocumentDto
public function parse(string $url, ?ParseOption $options = null): DocumentDto
{
return $this
->send((new ExtractTextRequest(
url: $url,
mimeType: $mimeType,
preferredDocumentProcessor: $options?->processor?->value ?? DocumentProcessor::PDFACT->value,
))->validate())
->dto();
Expand Down
6 changes: 0 additions & 6 deletions src/Requests/ExtractTextRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class ExtractTextRequest extends Request implements HasBody

public function __construct(
protected readonly string $url,
protected readonly string $mimeType,
protected readonly string $preferredDocumentProcessor = 'pdfact'
) {
//
Expand All @@ -33,7 +32,6 @@ protected function defaultBody(): array
{
return [
'url' => $this->url,
'mime_type' => $this->mimeType,
'driver' => $this->preferredDocumentProcessor ?? 'pdfact',
];
}
Expand All @@ -44,10 +42,6 @@ public function validate(): self
throw new InvalidArgumentException('The [url] is required to be non-empty.');
}

if (empty(trim($this->mimeType))) {
throw new InvalidArgumentException('The [mime type] is required to be non-empty.');
}

return $this;
}

Expand Down
4 changes: 0 additions & 4 deletions tests/ParseProcessorSelectionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
$body = $request->body()->all();

return $body['url'] === 'http://localhost/empty.pdf'
&& $body['mime_type'] === 'application/pdf'
&& $body['driver'] === 'pdfact';
});

Expand Down Expand Up @@ -62,7 +61,6 @@
$body = $request->body()->all();

return $body['url'] === 'http://localhost/empty.pdf'
&& $body['mime_type'] === 'application/pdf'
&& $body['driver'] === 'pymupdf';
});

Expand Down Expand Up @@ -93,7 +91,6 @@
$body = $request->body()->all();

return $body['url'] === 'http://localhost/empty.pdf'
&& $body['mime_type'] === 'application/pdf'
&& $body['driver'] === 'llama';
});

Expand Down Expand Up @@ -124,7 +121,6 @@
$body = $request->body()->all();

return $body['url'] === 'http://localhost/empty.pdf'
&& $body['mime_type'] === 'application/pdf'
&& $body['driver'] === 'unstructured';
});

Expand Down
16 changes: 8 additions & 8 deletions tests/ParseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient($mockClient);

$document = $connector->parse('http://localhost/base.pdf', 'application/pdf');
$document = $connector->parse('http://localhost/base.pdf');

expect($document)
->toBeInstanceOf(DocumentDto::class)
Expand Down Expand Up @@ -63,7 +63,7 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient($mockClient);

$document = $connector->parse('http://localhost/base.pdf', 'application/pdf', new ParseOption(DocumentProcessor::PYMUPDF));
$document = $connector->parse('http://localhost/base.pdf', new ParseOption(DocumentProcessor::PYMUPDF));

expect($document)
->toBeInstanceOf(DocumentDto::class)
Expand Down Expand Up @@ -91,7 +91,7 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient(MockClient::getGlobal());

$document = $connector->parse('http://localhost/empty.pdf', 'application/pdf');
$document = $connector->parse('http://localhost/empty.pdf');

expect($document)
->toBeInstanceOf(DocumentDto::class)
Expand All @@ -113,7 +113,7 @@
/** @var array */
$body = $request->body()->all();

return $body['url'] === 'http://localhost/empty.pdf' && $body['mime_type'] === 'application/pdf';
return $body['url'] === 'http://localhost/empty.pdf';
});

$mockClient->assertSentCount(1);
Expand All @@ -140,7 +140,7 @@
/** @var array */
$body = $request->body()->all();

return $body['url'] === 'http://localhost/base.docx' && $body['mime_type'] === 'application/pdf';
return $body['url'] === 'http://localhost/base.docx';
});

$mockClient->assertSentCount(1);
Expand All @@ -155,7 +155,7 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient(MockClient::getGlobal());

$connector->parse('http://localhost/test.pdf', 'application/pdf');
$connector->parse('http://localhost/test.pdf');

$mockClient->assertSent(ExtractTextRequest::class);

Expand All @@ -167,7 +167,7 @@
/** @var array */
$body = $request->body()->all();

return $body['url'] === 'http://localhost/test.pdf' && $body['mime_type'] === 'application/pdf';
return $body['url'] === 'http://localhost/test.pdf';
});

$mockClient->assertSentCount(1);
Expand All @@ -182,6 +182,6 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient($mockClient);

$connector->parse('http://localhost/km-f.pdf', 'application/pdf');
$connector->parse('http://localhost/km-f.pdf');

})->throws(ServiceUnavailableException::class, 'The pdfact service is not reachable.');
16 changes: 1 addition & 15 deletions tests/ValidationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,8 @@
$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient(MockClient::getGlobal());

$connector->parse('', 'application/pdf');
$connector->parse('');

$mockClient->assertNothingSent();

})->throws(InvalidArgumentException::class, 'The [url] is required to be non-empty.');

test('mime type required to be non-null', function () {
$mockClient = MockClient::global([
ExtractTextRequest::class => MockResponse::fixture('extract-text-invalid-mime'),
]);

$connector = new ParseConnector('fake', 'http://localhost:5002');
$connector->withMockClient(MockClient::getGlobal());

$connector->parse('http://localhost/test.pdf', '');

$mockClient->assertNothingSent();

})->throws(InvalidArgumentException::class, 'The [mime type] is required to be non-empty.');