-
-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathpdf.js
172 lines (155 loc) · 6.65 KB
/
pdf.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
const pdfjsPath = path => new URL(`vendor/pdfjs/${path}`, import.meta.url).toString()
import './vendor/pdfjs/pdf.mjs'
const pdfjsLib = globalThis.pdfjsLib
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath('pdf.worker.mjs')
const fetchText = async url => await (await fetch(url)).text()
// https://github.com/mozilla/pdf.js/blob/642b9a5ae67ef642b9a8808fd9efd447e8c350e2/web/text_layer_builder.css
const textLayerBuilderCSS = await fetchText(pdfjsPath('text_layer_builder.css'))
// https://github.com/mozilla/pdf.js/blob/642b9a5ae67ef642b9a8808fd9efd447e8c350e2/web/annotation_layer_builder.css
const annotationLayerBuilderCSS = await fetchText(pdfjsPath('annotation_layer_builder.css'))
const render = async (page, doc, zoom) => {
const scale = zoom * devicePixelRatio
doc.documentElement.style.transform = `scale(${1 / devicePixelRatio})`
doc.documentElement.style.transformOrigin = 'top left'
doc.documentElement.style.setProperty('--scale-factor', scale)
const viewport = page.getViewport({ scale })
// the canvas must be in the `PDFDocument`'s `ownerDocument`
// (`globalThis.document` by default); that's where the fonts are loaded
const canvas = document.createElement('canvas')
canvas.height = viewport.height
canvas.width = viewport.width
const canvasContext = canvas.getContext('2d')
await page.render({ canvasContext, viewport }).promise
doc.querySelector('#canvas').replaceChildren(doc.adoptNode(canvas))
const container = doc.querySelector('.textLayer')
const textLayer = new pdfjsLib.TextLayer({
textContentSource: await page.streamTextContent(),
container, viewport,
})
await textLayer.render()
// hide "offscreen" canvases appended to docuemnt when rendering text layer
// https://github.com/mozilla/pdf.js/blob/642b9a5ae67ef642b9a8808fd9efd447e8c350e2/web/pdf_viewer.css#L51-L58
for (const canvas of document.querySelectorAll('.hiddenCanvasElement'))
Object.assign(canvas.style, {
position: 'absolute',
top: '0',
left: '0',
width: '0',
height: '0',
display: 'none',
})
// fix text selection
// https://github.com/mozilla/pdf.js/blob/642b9a5ae67ef642b9a8808fd9efd447e8c350e2/web/text_layer_builder.js#L105-L107
const endOfContent = document.createElement('div')
endOfContent.className = 'endOfContent'
container.append(endOfContent)
// TODO: this only works in Firefox; see https://github.com/mozilla/pdf.js/pull/17923
container.onpointerdown = () => container.classList.add('selecting')
container.onpointerup = () => container.classList.remove('selecting')
const div = doc.querySelector('.annotationLayer')
await new pdfjsLib.AnnotationLayer({ page, viewport, div }).render({
annotations: await page.getAnnotations(),
linkService: {
goToDestination: () => {},
getDestinationHash: dest => JSON.stringify(dest),
addLinkAttributes: (link, url) => link.href = url,
},
})
}
const renderPage = async (page, getImageBlob) => {
const viewport = page.getViewport({ scale: 1 })
if (getImageBlob) {
const canvas = document.createElement('canvas')
canvas.height = viewport.height
canvas.width = viewport.width
const canvasContext = canvas.getContext('2d')
await page.render({ canvasContext, viewport }).promise
return new Promise(resolve => canvas.toBlob(resolve))
}
const src = URL.createObjectURL(new Blob([`
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<meta name="viewport" content="width=${viewport.width}, height=${viewport.height}">
<style>
html, body {
margin: 0;
padding: 0;
}
${textLayerBuilderCSS}
${annotationLayerBuilderCSS}
</style>
<div id="canvas"></div>
<div class="textLayer"></div>
<div class="annotationLayer"></div>
`], { type: 'text/html' }))
const onZoom = ({ doc, scale }) => render(page, doc, scale)
return { src, onZoom }
}
const makeTOCItem = item => ({
label: item.title,
href: JSON.stringify(item.dest),
subitems: item.items.length ? item.items.map(makeTOCItem) : null,
})
export const makePDF = async file => {
const transport = new pdfjsLib.PDFDataRangeTransport(file.size, [])
transport.requestDataRange = (begin, end) => {
file.slice(begin, end).arrayBuffer().then(chunk => {
transport.onDataRange(begin, chunk)
})
}
const pdf = await pdfjsLib.getDocument({
range: transport,
cMapUrl: pdfjsPath('cmaps/'),
standardFontDataUrl: pdfjsPath('standard_fonts/'),
isEvalSupported: false,
}).promise
const book = { rendition: { layout: 'pre-paginated' } }
const { metadata, info } = await pdf.getMetadata() ?? {}
// TODO: for better results, parse `metadata.getRaw()`
book.metadata = {
title: metadata?.get('dc:title') ?? info?.Title,
author: metadata?.get('dc:creator') ?? info?.Author,
contributor: metadata?.get('dc:contributor'),
description: metadata?.get('dc:description') ?? info?.Subject,
language: metadata?.get('dc:language'),
publisher: metadata?.get('dc:publisher'),
subject: metadata?.get('dc:subject'),
identifier: metadata?.get('dc:identifier'),
source: metadata?.get('dc:source'),
rights: metadata?.get('dc:rights'),
}
const outline = await pdf.getOutline()
book.toc = outline?.map(makeTOCItem)
const cache = new Map()
book.sections = Array.from({ length: pdf.numPages }).map((_, i) => ({
id: i,
load: async () => {
const cached = cache.get(i)
if (cached) return cached
const url = await renderPage(await pdf.getPage(i + 1))
cache.set(i, url)
return url
},
size: 1000,
}))
book.isExternal = uri => /^\w+:/i.test(uri)
book.resolveHref = async href => {
const parsed = JSON.parse(href)
const dest = typeof parsed === 'string'
? await pdf.getDestination(parsed) : parsed
const index = await pdf.getPageIndex(dest[0])
return { index }
}
book.splitTOCHref = async href => {
const parsed = JSON.parse(href)
const dest = typeof parsed === 'string'
? await pdf.getDestination(parsed) : parsed
const index = await pdf.getPageIndex(dest[0])
return [index, null]
}
book.getTOCFragment = doc => doc.documentElement
book.getCover = async () => renderPage(await pdf.getPage(1), true)
book.destroy = () => pdf.destroy()
return book
}