Skip to content

Commit

Permalink
Merge pull request #19 from freemocap/jon/add-audio
Browse files Browse the repository at this point in the history
Audio transcription method
  • Loading branch information
jonmatthis authored Jan 22, 2024
2 parents 08870b9 + 453efb0 commit 406e6a5
Show file tree
Hide file tree
Showing 22 changed files with 608 additions and 105 deletions.
105 changes: 53 additions & 52 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"@langchain/community": "^0.0.14",
"@langchain/core": "^0.1.8",
"@langchain/openai": "^0.0.9",
"@nestjs/axios": "^3.0.1",
"@nestjs/common": "^10.0.0",
"@nestjs/config": "^3.1.1",
"@nestjs/core": "^10.0.0",
Expand All @@ -34,15 +35,18 @@
"@nestjs/platform-express": "^10.0.0",
"@nestjs/swagger": "^7.1.17",
"@slack/bolt": "^3.17.0",
"axios": "^1.6.5",
"bson": "^6.2.0",
"bufferutil": "^4.0.8",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.1",
"discord.js": "^14.14.1",
"form-data": "^4.0.0",
"langchain": "^0.0.209",
"mongodb": "^5.9.2",
"mongoose": "^8.0.3",
"necord": "^6.4.2",
"openai": "^4.24.7",
"reflect-metadata": "^0.1.13",
"resource": "^0.8.1",
"rxjs": "^7.8.1",
Expand All @@ -55,7 +59,7 @@
"@nestjs/testing": "^10.0.0",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.2",
"@types/node": "^20.10.6",
"@types/node": "^20.11.5",
"@types/supertest": "^2.0.12",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
Expand Down
2 changes: 1 addition & 1 deletion src/core/ai/langchain/langchain.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export class LangchainService {
this._logger.log('Creating model...');
this._model = new OpenAI({
modelName: modelName || 'gpt-4-1106-preview',
openAIApiKey: await this._openAiSecrets.getOpenAIKey(),
openAIApiKey: await this._openAiSecrets.getOpenaiApiKey(),
});
}
this._logger.log('Returning model: ' + this._model.modelName);
Expand Down
50 changes: 50 additions & 0 deletions src/core/ai/openai/dto/speech-to-text.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import {
IsString,
IsIn,
IsOptional,
IsNumber,
Min,
Max,
IsMimeType,
} from 'class-validator';
import { Type } from 'class-transformer';

export class SpeechToTextDto {
@IsMimeType({
each: true,
message:
'File must be in one of the following formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.',
})
@Type(() => Object)
file: any;

@IsOptional()
@IsIn(['whisper-1'])
model: 'whisper-1' = 'whisper-1';

@IsOptional()
@IsString({ message: 'Language code must be a valid ISO-639-1 string.' })
language?: string; // ISO-639-1 format

@IsOptional()
@IsString({ message: 'Prompt must be a string.' })
@Max(4096, {
message: 'Prompt must be less than or equal to 4096 characters.',
})
prompt?: string;

@IsOptional()
@IsIn(['json', 'text', 'srt', 'verbose_json', 'vtt'])
response_format: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt' =
'verbose_json';

@IsOptional()
@IsNumber()
@Min(0)
@Max(1)
temperature: number = 0;

constructor(partial: Partial<SpeechToTextDto>) {
Object.assign(this, partial);
}
}
40 changes: 40 additions & 0 deletions src/core/ai/openai/dto/text-to-speech.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import {
IsString,
IsIn,
IsOptional,
IsNumber,
Min,
Max,
} from 'class-validator';
import { Type } from 'class-transformer';

export class TextToSpeechDto {
@IsIn(['tts-1', 'tts-1-hd'])
model: 'tts-1' | 'tts-1-hd';

@IsString()
@IsOptional()
@Type(() => String)
@IsString({ message: 'Input must be a string.' })
@Max(4096, {
message: 'Input must be less than or equal to 4096 characters.',
})
input: string; // Text to generate audio for, max 4096 characters

@IsIn(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])
voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';

@IsOptional()
@IsIn(['mp3', 'opus', 'aac', 'flac'])
response_format?: 'mp3' | 'opus' | 'aac' | 'flac';

@IsOptional()
@IsNumber()
@Min(0.25)
@Max(4.0)
speed?: number; // Between 0.25 and 4.0, defaults to 1

constructor(partial: Partial<TextToSpeechDto>) {
Object.assign(this, partial);
}
}
Loading

0 comments on commit 406e6a5

Please sign in to comment.