-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathPDFExtract.js
34 lines (29 loc) · 1.11 KB
/
PDFExtract.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
//Analyzes the lines in a chunk/paragraph to deterine if a line should be joined to the line that follows it.
function analyzeJoins(lines, lang) {
//<span id="page1c1p1l1" joinScore="100.00" style="top:0px;left:0px;width:0px;height:0px;">
switch(lang.toLower()) {
case "en":
// code block
break;
case "fr":
// code block
break;
default:
// code block
}
return lines;
}
//Adjusts the line to repair poorly rendered objects that may be split, but are one.
function repairObjectSequence(line) {
return line;
}
//Custom detection that would identify a section as a header. This is only called if internal logic has not already identified the content as a header and is within the first 5 paragraphs on the page.
function isHeader(lines, pageWidth, pageHeight) {
//code block
return false;
}
//Custom detection that would identify a section as a footer. This is only called if internal logic has not already identified the content as a footer and is within the last 5 paragraphs on the page.
function isFooter(lines, pageWidth, pageHeight) {
//code block
return false;
}