Skip to content

Commit

Permalink
implement pdf content extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
dhx committed Dec 20, 2024
1 parent 3e4893e commit f616b07
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 3 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"erusev/parsedown": "^1.7",
"henck/rtf-to-html": "^1.2",
"html2text/html2text": "^4.3",
"phpoffice/phpword": "^1.2"
"phpoffice/phpword": "^1.2",
"smalot/pdfparser": "^2.10"
},
"scripts": {
"lint": "find . -name \\*.php -not -path './vendor/*' -print0 | xargs -0 -n1 php -l",
Expand Down
133 changes: 132 additions & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions lib/Service/AssistantService.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
use RtfHtmlPhp\Document;
use RtfHtmlPhp\Html\HtmlFormatter;
use RuntimeException;
use Smalot\PdfParser\Parser;

/**
* @psalm-import-type AssistantTaskProcessingTaskType from ResponseDefinitions
Expand Down Expand Up @@ -613,6 +614,13 @@ public function parseTextFromFile(string $userId, ?string $filePath = null, ?int
$this->tempManager->clean();
break;
}
case 'application/pdf':
{
$parser = new Parser();
$pdf = $parser->parseContent($fileContent);
$text = $pdf->getText();
break;
}
}
return $text;
}
Expand Down
2 changes: 1 addition & 1 deletion src/components/fields/TextInput.vue
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ const VALID_MIME_TYPES = [
'application/msword', // doc
'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // docx
'application/vnd.oasis.opendocument.text', // odt
// 'application/pdf', // Not yet supported
'application/pdf', // pdf
]
const picker = (callback, target) => getFilePickerBuilder(t('assistant', 'Choose a text file'))
Expand Down

0 comments on commit f616b07

Please sign in to comment.