Skip to content

Commit

Permalink
feat(speech-to-text): SpeechToTextManager::transcribeFile calls TaskP…
Browse files Browse the repository at this point in the history
…rocessingManager::runTask

Signed-off-by: Julien Veyssier <[email protected]>
  • Loading branch information
julien-nc committed Aug 29, 2024
1 parent 93adfbe commit 0adf57a
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 9 deletions.
27 changes: 26 additions & 1 deletion lib/private/SpeechToText/SpeechToTextManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
use OCP\SpeechToText\ISpeechToTextProvider;
use OCP\SpeechToText\ISpeechToTextProviderWithId;
use OCP\SpeechToText\ISpeechToTextProviderWithUserId;
use OCP\TaskProcessing\IManager as ITaskProcessingManager;
use OCP\TaskProcessing\Task;
use OCP\TaskProcessing\TaskTypes\AudioToText;
use Psr\Container\ContainerExceptionInterface;
use Psr\Container\NotFoundExceptionInterface;
use Psr\Log\LoggerInterface;
Expand All @@ -41,6 +44,7 @@ public function __construct(
private IJobList $jobList,
private IConfig $config,
private IUserSession $userSession,
private ITaskProcessingManager $taskProcessingManager,
) {
}

Expand Down Expand Up @@ -112,7 +116,28 @@ public function cancelScheduledFileTranscription(File $file, ?string $userId, st
}
}

public function transcribeFile(File $file): string {
public function transcribeFile(File $file, ?string $userId = null, string $appId = 'core'): string {
// try to run a TaskProcessing core:audio2text task
// this covers scheduling as well because OC\SpeechToText\TranscriptionJob calls this method
try {
$taskProcessingTask = new Task(
AudioToText::ID,
['input' => $file->getId()],
$appId,
$userId,
'from-SpeechToTextManager||' . $file->getId() . '||' . ($userId ?? '') . '||' . $appId,
);
$resultTask = $this->taskProcessingManager->runTask($taskProcessingTask);
if ($resultTask->getStatus() === Task::STATUS_SUCCESSFUL) {
$output = $resultTask->getOutput();
if (isset($output['output']) && is_string($output['output'])) {
return $output['output'];
}
}
} catch (Throwable $e) {
$this->logger->debug('Failed to run a Speech-to-text job from STTManager with TaskProcessing for file ' . $file->getId(), ['exception' => $e]);
}

if (!$this->hasProviders()) {
throw new PreConditionNotMetException('No SpeechToText providers have been registered');
}
Expand Down
2 changes: 1 addition & 1 deletion lib/private/SpeechToText/TranscriptionJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ protected function run($argument) {
);
return;
}
$result = $this->speechToTextManager->transcribeFile($file);
$result = $this->speechToTextManager->transcribeFile($file, $userId, $appId);
$this->eventDispatcher->dispatchTyped(
new TranscriptionSuccessfulEvent(
$fileId,
Expand Down
26 changes: 24 additions & 2 deletions lib/private/TaskProcessing/Manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ public function __construct(
IAppDataFactory $appDataFactory,
private IRootFolder $rootFolder,
private \OCP\TextToImage\IManager $textToImageManager,
private \OCP\SpeechToText\ISpeechToTextManager $speechToTextManager,
private IUserMountCache $userMountCache,
private IClientService $clientService,
private IAppManager $appManager,
Expand Down Expand Up @@ -369,12 +368,35 @@ public function getOptionalOutputShapeEnumValues(): array {
return $newProviders;
}

/**
* This is almost a copy of SpeechToTextManager->getProviders
* to avoid a dependency cycle between SpeechToTextManager and TaskProcessingManager
*/
private function _getRawSpeechToTextProviders(): array {
$context = $this->coordinator->getRegistrationContext();
if ($context === null) {
return [];
}
$providers = [];
foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
$class = $providerServiceRegistration->getService();
try {
$providers[$class] = $this->serverContainer->get($class);
} catch (NotFoundExceptionInterface|ContainerExceptionInterface|\Throwable $e) {
$this->logger->error('Failed to load SpeechToText provider ' . $class, [
'exception' => $e,
]);
}
}

return $providers;
}

/**
* @return IProvider[]
*/
private function _getSpeechToTextProviders(): array {
$oldProviders = $this->speechToTextManager->getProviders();
$oldProviders = $this->_getRawSpeechToTextProviders();
$newProviders = [];
foreach ($oldProviders as $oldProvider) {
$newProvider = new class($oldProvider, $this->rootFolder, $this->appData) implements IProvider, ISynchronousProvider {
Expand Down
11 changes: 7 additions & 4 deletions lib/private/TextProcessing/Manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,13 @@ public function runTask(OCPTask $task): string {
$this->logger->debug('Running a TextProcessing (' . $taskTypeClass . ') task with TaskProcessing');
$taskProcessingResultTask = $this->taskProcessingManager->runTask($taskProcessingTask);
if ($taskProcessingResultTask->getStatus() === \OCP\TaskProcessing\Task::STATUS_SUCCESSFUL) {
$task->setOutput($taskProcessingResultTask->getOutput()['output'] ?? '');
$task->setStatus(OCPTask::STATUS_SUCCESSFUL);
$this->taskMapper->update(DbTask::fromPublicTask($task));
return $task->getOutput();
$output = $taskProcessingResultTask->getOutput();
if (isset($output['output']) && is_string($output['output'])) {
$task->setOutput($output['output']);
$task->setStatus(OCPTask::STATUS_SUCCESSFUL);
$this->taskMapper->update(DbTask::fromPublicTask($task));
return $output['output'];
}
}
} catch (\Throwable $e) {
$this->logger->error('TextProcessing to TaskProcessing failed', ['exception' => $e]);
Expand Down
4 changes: 3 additions & 1 deletion lib/public/SpeechToText/ISpeechToTextManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ public function cancelScheduledFileTranscription(File $file, ?string $userId, st

/**
* @param File $file The media file to transcribe
* @param ?string $userId The user that triggered this request
* @param string $appId The app that triggered this request
* @returns string The transcription of the passed media file
* @throws PreConditionNotMetException If no provider was registered but this method was still called
* @throws InvalidArgumentException If the file could not be found or is not of a supported type
* @throws RuntimeException If the transcription failed for other reasons
* @since 27.0.0
*/
public function transcribeFile(File $file): string;
public function transcribeFile(File $file, ?string $userId, string $appId): string;
}

0 comments on commit 0adf57a

Please sign in to comment.