forked from mayeaux/generate-subtitles
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscribing.js
176 lines (142 loc) · 5.16 KB
/
transcribing.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
const fs = require("fs-extra");
const createTranslatedFiles = require("./create-translated-files");
const {forHumans} = require("./helpers");
// TODO: move to another directory
const outputFileExtensions = ['.srt', '.vtt', '.txt']
const nodeEnvironment = process.env.NODE_ENV;
const libreTranslateHostPath = process.env.LIBRETRANSLATE;
l(`libreTranslateHostPath: ${libreTranslateHostPath}`)
const isProd = nodeEnvironment === 'production';
function buildArguments({
uploadedFilePath,
language,
model,
sixDigitNumber
}){
/** INSTANTIATE WHISPER PROCESS **/
// queue up arguments, path is the first one
let arguments = [];
// first argument is path to file
arguments.push(uploadedFilePath);
// these don't have to be defined
if(language) arguments.push('--language', language);
if(model) arguments.push('--model', model);
// TODO: add the max GPUS thing here
if(isProd){
if(global.topLevelValue === 1){
arguments.push('--device', 'cuda:0');
} else if(global.topLevelValue === 2){
arguments.push('--device', 'cuda:1');
}
toggleTopLevelValue();
}
// dont show the text output but show the progress thing
arguments.push('--verbose', 'False');
// folder to save .txt, .vtt and .srt
arguments.push('-o', `transcriptions/${sixDigitNumber}`);
l('transcribe arguments');
l(arguments);
return arguments
}
function toggleTopLevelValue(){
if(global.topLevelValue === 1){
global.topLevelValue = 2
} else if(global.topLevelValue === 2){
global.topLevelValue = 1
}
}
function autoDetectLanguage(dataAsString){
if(!dataAsString) return false
if(dataAsString.includes('Detected language:')){
// parse out the language from the console output
return dataAsString.split(':')[1].substring(1).trimEnd();
}
return false;
}
async function writeToProcessingDataFile(processingDataPath, dataObject){
// save data to the file
const processingDataExists = await fs.exists(processingDataPath)
l('processingDataExists')
l(processingDataExists);
if(processingDataExists){
const fileData = await fs.readFile(processingDataPath, 'utf8')
l('fileData');
l(fileData);
const existingProcessingData = JSON.parse(fileData);
let merged = Object.assign({}, existingProcessingData, dataObject);
await fs.writeFile(processingDataPath, JSON.stringify(merged), 'utf8');
} else {
await fs.writeFile(processingDataPath, JSON.stringify(dataObject), 'utf8');
}
}
async function translateIfNeeded({ language, shouldTranslate, processingDataPath, directoryAndFileName}){
const shouldTranslateFromLanguage = shouldTranslateFrom(language);
l(`should translate from language: ${shouldTranslateFromLanguage}`)
l(`libreTranslateHostPath: ${libreTranslateHostPath}`)
l(`should translate: ${shouldTranslate}`)
let translationStarted, translationFinished = false;
/** AUTOTRANSLATE WITH LIBRETRANSLATE **/
if(libreTranslateHostPath && shouldTranslateFromLanguage && shouldTranslate){
l('hitting LibreTranslate');
translationStarted = new Date();
// hit libretranslate
await createTranslatedFiles({
directoryAndFileName,
language,
})
await writeToProcessingDataFile(processingDataPath, {
translationStartedAt: new Date(),
status: 'translating',
})
}
}
async function saveTranscriptionCompletedInformation({
startingDate,
sixDigitNumber
}){
function getUploadDurationInSeconds(){
// FPROBE: get the duration
}
const processingDataPath = `./transcriptions/${sixDigitNumber}/processing_data.json`;
// just post-processing, you can return the response
const processingSeconds = Math.round((new Date() - startingDate) / 1000);
await writeToProcessingDataFile(processingDataPath, {
processingSeconds,
processingSecondsHumanReadable: forHumans(processingSeconds),
startedAt: startingDate.toUTCString(),
finishedAT: new Date().toUTCString(),
status: 'completed',
})
}
async function moveAndRenameFilesAndFolder({
originalUpload,
uploadFileName,
sixDigitNumber,
originalFileExtension,
}){
const originalUploadPath = originalUpload;
// the directory with the output from whisper
let currentContainingDir = `./transcriptions/${sixDigitNumber}`;
const newUploadPath = `${currentContainingDir}/${sixDigitNumber}${originalFileExtension}`
// rename original upload to use the original file upload name
await fs.move(originalUploadPath, newUploadPath)
// move each of the different output files
for(const fileExtension of outputFileExtensions){
// create the prepend thing to loop over
const transcribedFilePath = `${currentContainingDir}/${uploadFileName}${fileExtension}`
const newTranscribedFilePath = `${currentContainingDir}/${sixDigitNumber}${fileExtension}`
// rename
await fs.move(transcribedFilePath, newTranscribedFilePath)
}
// rename containing dir to the safe filename (from upload filename)
// const renamedDirectory = `./transcriptions/${sixDigitNumber}`;
// await fs.rename(currentContainingDir, renamedDirectory)
}
module.exports = {
moveAndRenameFilesAndFolder,
saveTranscriptionCompletedInformation,
translateIfNeeded,
buildArguments,
autoDetectLanguage,
writeToProcessingDataFile
}