forked from mikejarema/purgatory
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpurgatory.rb
executable file
·278 lines (231 loc) · 9.26 KB
/
purgatory.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/ruby
PURGATORY_VERSION = "0.1.4"
require 'rubygems'
require 'optparse'
require 'open-uri'
require 'csv'
require 'yaml'
require 'tempfile'
$dictionary = {}
$lookup_cache = {}
Infinity = 1.0 / 0
def match?(row, options)
full_domain = row[0]
domain, ext = full_domain.split(/\./)
date = row[1]
type = row[2]
match = true
match = false if match && !options[:extension].empty? && !options[:extension].include?(ext)
match = false if match && !options[:nums] && domain =~ /[0-9]/
match = false if match && !options[:hyphens] && domain =~ /\-/
match = false if match && options[:min_len] && domain.length < options[:min_len]
match = false if match && options[:max_len] && domain.length > options[:max_len]
match = false if match && options[:keyword] && !domain.include?(options[:keyword])
match = false if match && options[:format] && domain !~ options[:format]
match = false if match && options[:start] && domain !~ /^#{options[:start]}/
match = false if match && options[:end] && domain !~ /#{options[:end]}$/
match = false if match && !options[:word_lengths].empty? && !has_word_lengths?(domain, options[:word_lengths], options[:has_variable_word_length], options[:required_length])
match = false if match && options[:min_words] && options[:max_words] && !has_words?(domain, options[:min_words], options[:max_words])
match
end
def has_words?(domain, min_words, max_words)
num_words = min_num_words_from(domain.downcase, max_words)
return num_words >= min_words && num_words <= max_words
end
def min_num_words_from(phrase, max_words = Infinity, recursion_depth = 1)
if $dictionary.has_key?(phrase)
return 1
elsif $lookup_cache.has_key?(phrase)
return $lookup_cache[phrase]
elsif phrase.length <= 1
return Infinity
elsif recursion_depth < max_words
num_words = Infinity
(phrase.length - 2).downto(0) do |i|
num_words_i =
min_num_words_from(phrase[0..i], max_words, recursion_depth + 1) +
min_num_words_from(phrase[i+1..-1], max_words, recursion_depth + 1)
num_words = num_words_i if num_words_i < num_words
end
$lookup_cache[phrase] = num_words if num_words < Infinity
return num_words
else
return Infinity
end
end
def load_dictionary(file, additional_words = nil)
STDERR.print "Loading dictionary for word count restrictions..."
# Load dictionary contents from file
File.open(file).each do |line|
line.gsub("->", "").gsub(/\[.*?\]/, "").split(/[ \t,]/).select{|i| i.chomp != ""}.each do |word|
$dictionary[word.chomp] = true
end
end
# Add additional words from the command line
[additional_words].flatten.compact.each do |word|
$dictionary[word.downcase.chomp] = true
end
STDERR.puts " done."
end
def main
options = {
:nums => true,
:hyphens => true,
:extension => [],
:min_len => nil,
:max_len => nil,
:keyword => nil,
:dictionary => "2+2lemma.txt",
:min_words => nil,
:max_words => nil,
:refresh => should_refresh_list?,
:date => false,
:word_lengths => [],
:has_variable_word_length => false,
:required_length => 0,
:additional_words => [],
}
dictionary_required = false
OptionParser.new do |opts|
opts.banner = "Usage: purgatory.rb [options]"
# Defaults
options[:nums] = false
options[:hyphens] = false
options[:extension] = ["com"]
# Parameter Processing
opts.on("-x", "--ext com,net,org", Array, "Desired extensions") do |o|
options[:extension] = o
end
opts.on("-n", "--[no-]nums", "Include/exclude nums") do |o|
options[:nums] = o
end
opts.on("-h", "--[no-]hyphens", "Include/exclude hyphens") do |o|
options[:hyphens] = o
end
opts.on("-l 2,8", Array, "Min, max length of domain, * for no upper/lower limit") do |o|
min_len_match = /[0-9]+/.match(o[0])
options[:min_len] = min_len_match ? min_len_match[0].to_i : 1
max_len_match = /[0-9]+/.match(o[1])
options[:max_len] = max_len_match ? max_len_match[0].to_i : Infinity
options[:max_len] = options[:min_len] if o.size == 1
end
opts.on("-i", "--include keyword", "Ensure keyword appears in domain") do |o|
options[:keyword] = o
end
opts.on("-s", "--start keyword", "Starts with keyword") do |o|
options[:start] = o
end
opts.on("-e", "--end keyword", "Ends with keyword") do |o|
options[:end] = o
end
opts.on("-f lncv-", "Specify a format with [l]etter, [n]umbers, [c]onsonants, [v]owels, and hyphens") do |o|
options[:format] = /^#{o.gsub(/l/, "[a-z]").gsub(/n/, "[0-9]").gsub(/v/, "[aeiou]").gsub(/c/, "[bcdfghjklmnpqrstvwxyz]")}$/i
end
opts.on("-d", "--dictionary", "Dictionary for word matching") do |o|
options[:dictionary] = o
end
opts.on("-w 1,3", Array, "Min, max number of dictionary words in domain, * for no upper/lower limit") do |o|
min_len_match = /[0-9]+/.match(o[0])
options[:min_words] = min_len_match ? min_len_match[0].to_i : 1
max_len_match = /[0-9]+/.match(o[1])
options[:max_words] = max_len_match ? max_len_match[0].to_i : Infinity
options[:max_words] = options[:min_words] if o.size == 1
dictionary_required = true
end
opts.on("--[no-]fetch", "Forces the script to refresh the working list of expiring domains") do |o|
options[:refresh] = o
end
opts.on("-v", "--version", "Shows version number") do |o|
STDERR.puts "Purgatory v#{PURGATORY_VERSION} - Copyright (c) 2012 Mike Jarema"
exit
end
opts.on("--[no-]date", "Shows the drop date of matching domains") do |o|
options[:date] = o
end
opts.on("--add-words example,words", Array, "Treats the supplied words as dictionary words for the purposes of the current lookup") do |o|
options[:additional_words] = o
end
opts.on("--word-lengths 3,3", Array, "Word lengths to match, eg. 3,3 would only match 6 char domains consisting of two 3-letter words. Permits one optional * denoting a variable length word.") do |o|
options[:word_lengths] = o.map{|i| i == "*" ? :variable : i.to_i}
dictionary_required = true
options[:has_variable_word_length] = options[:word_lengths].include?(:variable)
# Determine the exact or minimum total domain length required to match
# word length specification
options[:required_length] = 0
options[:word_lengths].map do |i|
if i.is_a?(Numeric)
options[:required_length] += i
else
# Don't count length requirement for variable length specifier
end
end
if options[:word_lengths].find_all{|i| i == :variable}.length > 1
STDERR.puts "More than one variable word length specifiers used on --word-lengths"
exit
end
end
opts.on("--prefix my", "Searches for two word domains with the specified prefix, overrides --word-lengths, -w and -s parameters") do |o|
options[:word_lengths] = [o.length, :variable]
options[:has_variable_word_length] = true
options[:required_length] = o.length
options[:start] = o
options[:min_words] = options[:max_words] = nil
options[:additional_words] << o
dictionary_required = true
end
opts.on("--suffix now", "Searches for two word domains with the specified suffix, overrides --word-lengths, -w and -e parameters") do |o|
options[:word_lengths] = [:variable, o.length]
options[:has_variable_word_length] = true
options[:required_length] = o.length
options[:end] = o
options[:min_words] = options[:max_words] = nil
options[:additional_words] << o
dictionary_required = true
end
end.parse!
refresh_list if options[:refresh]
if dictionary_required
load_dictionary(options[:dictionary], options[:additional_words])
end
dir = File.dirname(__FILE__)
Dir.open(dir).each do |file|
if file =~ /pool.*\.txt/i
CSV.foreach("#{dir}/#{file}") do |row|
puts row[0] + (options[:date] ? "\t(#{row[1]})" : "") if match?(row, options)
end
end
end
end
def refresh_list
# Assumes expiring domain list is found in PoolDeletingDomainsList.txt
STDERR.print "Downloading expiring domain list from pool.com..."
temp_zip = Time.now.to_i.to_s + "_" + rand(Time.now.to_i).to_s + ".zip"
`curl "http://www.pool.com/Downloads/PoolDeletingDomainsList.zip" > #{temp_zip} 2> /dev/null`
`unzip -o #{temp_zip}`
`rm #{temp_zip}`
`touch PoolDeletingDomainsList.txt`
STDERR.puts " done."
end
def should_refresh_list?
!File.exist?("PoolDeletingDomainsList.txt") ||
File.mtime("PoolDeletingDomainsList.txt") < Time.now - 3600 # older than 1 hour
end
def has_word_lengths?(domain, word_lengths, has_variable_word_length, required_length)
# Exit early if the length specification is unmet
if !has_variable_word_length && required_length != domain.length ||
has_variable_word_length && required_length >= domain.length
return false
end
variable_length = domain.length - required_length
has_word_lengths = true
i = 0
while has_word_lengths && domain.length > 0
i_length = word_lengths[i].is_a?(Numeric) ? word_lengths[i] : variable_length
current_word, domain = domain[0..i_length-1], domain[i_length..-1]
has_word_lengths &&= $dictionary[current_word]
break if !has_word_lengths
i += 1
end
has_word_lengths
end
main