-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathqueue.js
103 lines (84 loc) · 2.21 KB
/
queue.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"use strict";
var _ = require('lodash');
var redis = require("redis");
var fs = require('fs');
var conf = JSON.parse(fs.readFileSync('conf.json'));
var queue = "scraper_queue";
var push_check = "push_check";
var client = redis.createClient(conf.redis.port, conf.redis.host, conf.redis.password);
client.on("error", function (err) {
console.log("Error " + err);
});
var no_op = function() {
};
var addOnce = function(url) {
client.on("error", function (err) {
console.log("Error " + err);
});
//console.log(url);
// only add to the queue if we haven't already added it
client.hget(push_check, url, function(err, reply) {
// reply is null when the key is missing
if ( reply === null ) {
//console.log("push " + url);
client.hset(push_check, url, "1");
client.rpush(queue, url, no_op);
}
});
};
var add = function(urls, do_quit) {
if ( typeof(urls) === "string" ) {
urls = [ urls ];
}
for ( var i = 0; i < urls.length; i++ ) {
addOnce(urls[i], do_quit);
}
};
var get = function(cb) {
client.lrange(queue, 0, 1000, function(err, replies) {
var url = _.sample(replies);
client.lrem(queue, 0, url);
cb(url);
});
};
var peek = function() {
client.lrange(queue, 0, 1000, function(err, replies) {
//console.log(replies);
//console.log("*****", replies.length);
});
};
var mark = function(u) {
client.set(u, "1");
};
var _alwaysRun = false;
var alwaysRun = function(val) {
_alwaysRun = val;
}
var runOnce = function(u, cb) {
if ( _alwaysRun == true ) {
console.log("queue is set to always run!");
cb();
return true;
}
client.get(u, function(err, reply) {
// reply is null when the key is missing
if ( reply === null ) {
cb();
client.set(u, "1");
}
else {
console.log("looks like " + u + " was already scraped");
}
});
};
var quit = function() {
client.quit();
};
exports.add = add;
exports.get = get;
exports.mark = mark;
exports.alwaysRun = alwaysRun;
exports.runOnce = runOnce;
exports.peek = peek;
exports.quit = quit;
exports.client = client;