-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.py
36 lines (32 loc) · 1.38 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import urllib2
from sys import argv, exc_info
from BeautifulSoup import BeautifulSoup
def get_args():
"""Gets the string to search in the API and the collection to search."""
collection = 'a'
while (collection != 'i' and collection != 'items' and collection != 'item' and collection != 'u' and collection != 'users' and collection != 'user'):
try:
script, collection, query = argv
except (ValueError):
print "Please enter the collection you would like to search followed by your query."
collection = raw_input("Collection (items|users) >>> ")
query = raw_input("Query >>> ")
if (collection != 'i' and collection != 'items' and collection != 'item' and collection != 'u' and collection != 'users' and collection != 'user'):
print "Please enter a valid value for collection."
print collection
print query, type(query)
return collection, query
def search(collection, query):
"""Makes the request to the API and returns it."""
if (collection == 'u' or collection == 'user'):
collection = 'users'
elif (collection == 'i' or collection == 'item'):
collection = 'items'
address = "http://api.thriftdb.com/api.hnsearch.com/" + collection + "/_search?q=" + query + "&pretty_print=true"
print address + "\n"
html = urllib2.urlopen(address).read()
soup = BeautifulSoup(html)
return soup
collection, query = get_args()
data = search(collection, query)
print data