forked from brynedwards/readability-wrapper
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.js
executable file
·97 lines (79 loc) · 2.51 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env node
/*
- @docs https://github.com/mozilla/readability
*/
const sanitizeEnabled = ! Boolean(process.env.readability_sanitize_disabled)
const debug = Boolean(process.env.DEBUGME)
// const handleError = err => console.error(err.toString());
const jsdom = require('jsdom');
const { JSDOM } = jsdom;
const jsdomConsole = new jsdom.VirtualConsole();
// Suppress these errors for now
jsdomConsole.on('jsdomError', () => { });
const createDOMPurify = require('dompurify');
const window = new JSDOM('').window;
const DOMPurify = createDOMPurify(window);
const sanitizeHtml = require('sanitize-html');
const sanOpts = {
// allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ])
// allow all tags or all attributes:
allowedTags: false,
allowedAttributes: false
}
const program = require('commander');
const pkg = require('./package.json');
var { Readability } = require('@mozilla/readability');
const readability = (dom, url) => {
// Happens on missing file
if (!dom) return;
const opts = {
keepClasses: true
};
const article = new Readability(dom.window.document, opts).parse();
if (!article) {
console.error(`Error: Readability returned nothing for url "${url}". This usually happens on empty input.`);
return;
}
if (debug) {
console.error(JSON.stringify(article));
}
if (article.title) {
console.log('<p><b>', sanitizeHtml(article.title), '</b></p>')
}
if (sanitizeEnabled) {
console.log(sanitizeHtml(article.content, sanOpts));
} else {
console.log(article.content);
}
};
const run = (url) => {
(async () => {
const getStdin = require('get-stdin');
var doc = await getStdin();
var clean = doc;
if (sanitizeEnabled) {
clean = DOMPurify.sanitize(clean);
}
if (debug) {
if (! sanitizeEnabled) {
console.error("readability: sanitization disabled by user option.")
}
console.error('url: ', url)
}
const options = {
features: {
FetchExternalResources: false,
ProcessExternalresources: false,
},
virtualConsole: jsdomConsole,
url: url,
};
readability(new JSDOM(clean, options), url);
})();
};
program
.version(pkg.version)
.arguments('<url>')
.description('Sanitizes stdin, parses the result with Mozilla Readability, somewhat sanitizes the output again, and finally prints it to stdout. Note that you need to also specify the URL in addition to feeding us the HTML in stdin. Using an empty URL seems to work though.')
.action(run)
.parse(process.argv);