diff --git a/clone.py b/clone.py index c46adc6a..03afca63 100644 --- a/clone.py +++ b/clone.py @@ -14,13 +14,15 @@ GNU General Public License for more details. """ -import re +import argparse +import asyncio +import hashlib +import json import os +import re import sys - -import asyncio from asyncio import Queue -import argparse + import aiohttp import cssutils import yarl @@ -28,9 +30,11 @@ class Cloner(object): - def __init__(self, root): + def __init__(self, root, max_depth): self.visited_urls = [] self.root = self.add_scheme(root) + self.max_depth = max_depth + self.moved_root = None if len(self.root.host) < 4: sys.exit('invalid taget {}'.format(self.root.host)) self.target_path = '/opt/snare/pages/{}'.format(self.root.host) @@ -39,6 +43,7 @@ def __init__(self, root): os.mkdir(self.target_path) self.new_urls = Queue() + self.meta = {} @staticmethod def add_scheme(url): @@ -48,89 +53,115 @@ def add_scheme(url): new_url = yarl.URL('http://' + url) return new_url - @asyncio.coroutine - def process_link(self, url, check_host=False): - url = yarl.URL(url) + async def process_link(self, url, level, check_host=False): + try: + url = yarl.URL(url) + except UnicodeError: + return None + if url.scheme == ("data" or "javascript" or "file"): + return url.human_repr() + if not url.is_absolute(): + if self.moved_root is None: + url = self.root.join(url) + else: + url = self.moved_root.join(url) + + host = url.host + if check_host: - if (url.host != self.root.host or url.fragment - or url in self.visited_urls): + if (host != self.root.host and self.moved_root is None) or \ + url.fragment or \ + (self.moved_root is not None and host != self.moved_root.host): return None - if not url.is_absolute(): - url = self.root.join(url) - yield from self.new_urls.put(url) - return url.relative().human_repr() + if url.human_repr() not in self.visited_urls and (level + 1) <= self.max_depth: + await self.new_urls.put((url, level + 1)) + + res = None + try: + res = url.relative().human_repr() + except ValueError: + print(url) + return res - @asyncio.coroutine - def replace_links(self, data): + async def replace_links(self, data, level): soup = BeautifulSoup(data, 'html.parser') # find all relative links for link in soup.findAll(href=True): - res = yield from self.process_link(link['href'], check_host=True) + res = await self.process_link(link['href'], level, check_host=True) if res is not None: link['href'] = res # find all images and scripts for elem in soup.findAll(src=True): - res = yield from self.process_link(elem['src']) + res = await self.process_link(elem['src'], level) if res is not None: elem['src'] = res # find all action elements for act_link in soup.findAll(action=True): - res = yield from self.process_link(act_link['action']) + res = await self.process_link(act_link['action'], level) if res is not None: act_link['action'] = res # prevent redirects for redir in soup.findAll(True, attrs={'name': re.compile('redirect.*')}): - redir['value'] = yarl.URL(redir['value']).relative().human_repr() + if redir['value'] != "": + redir['value'] = yarl.URL(redir['value']).relative().human_repr() return soup - @asyncio.coroutine - def get_body(self): - while not self.new_urls.empty(): - current_url = yield from self.new_urls.get() - if current_url in self.visited_urls: - continue - self.visited_urls.append(current_url) - if current_url.name: - file_name = current_url.name - elif current_url.raw_path != '/': - file_name = current_url.path.rsplit('/')[1] - else: - file_name = 'index.html' - file_path = os.path.dirname(current_url.path) - if file_path == '/': - file_path = self.target_path - else: - file_path = os.path.join(self.target_path, file_path[1:]) + def _make_filename(self, url): + host = url.host + if url.is_absolute(): + file_name = url.relative().human_repr() + else: + file_name = url.human_repr() + if not file_name.startswith('/'): + file_name = "/" + file_name - print('path: ', file_path, 'name: ', file_name) + if file_name == '/' or file_name == "": + if host == self.root.host or (self.moved_root is not None and self.moved_root.host == host): + file_name = '/index.html' + else: + file_name = host + m = hashlib.md5() + m.update(file_name.encode('utf-8')) + hash_name = m.hexdigest() + return file_name, hash_name - if file_path and not os.path.exists(file_path): - os.makedirs(file_path) + async def get_body(self, session): + while not self.new_urls.empty(): + current_url, level = await self.new_urls.get() + if current_url.human_repr() in self.visited_urls: + continue + self.visited_urls.append(current_url.human_repr()) + file_name, hash_name = self._make_filename(current_url) + print('name: ', file_name) + self.meta[file_name] = {} data = None + content_type = None try: with aiohttp.Timeout(10.0): - with aiohttp.ClientSession() as session: - response = yield from session.get(current_url) - data = yield from response.read() - except aiohttp.ClientError as client_error: + response = await session.get(current_url) + content_type = response.content_type + data = await response.read() + + except (aiohttp.ClientError, asyncio.TimeoutError) as client_error: print(client_error) else: - response.release() - session.close() + await response.release() if data is not None: - if re.match(re.compile('.*\.(html|php)'), file_name): - soup = yield from self.replace_links(data) + self.meta[file_name]['hash'] = hash_name + self.meta[file_name]['content_type'] = content_type + if content_type == 'text/html': + soup = await self.replace_links(data, level) data = str(soup).encode() - with open(os.path.join(file_path, file_name), 'wb') as index_fh: + with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh: index_fh.write(data) - if '.css' in file_name: + if content_type == 'text/css': css = cssutils.parseString(data) for carved_url in cssutils.getUrls(css): if carved_url.startswith('data'): @@ -138,13 +169,31 @@ def get_body(self): carved_url = yarl.URL(carved_url) if not carved_url.is_absolute(): carved_url = self.root.join(carved_url) - if carved_url not in self.visited_urls: - yield from self.new_urls.put(carved_url) - - @asyncio.coroutine - def run(self): - yield from self.new_urls.put(self.root) - return (yield from self.get_body()) + if carved_url.human_repr() not in self.visited_urls: + await self.new_urls.put((carved_url,level+1)) + + async def get_root_host(self): + try: + with aiohttp.ClientSession() as session: + resp = await session.get(self.root) + if resp._url_obj.host != self.root.host: + self.moved_root = resp._url_obj + resp.close() + except aiohttp.errors.ClientError as err: + print("Can\'t connect to target host.") + exit(-1) + + async def run(self): + session = aiohttp.ClientSession() + try: + await self.new_urls.put((self.root, 0)) + await self.get_body(session) + except KeyboardInterrupt: + raise + finally: + with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj: + json.dump(self.meta, mj) + await session.close() def main(): @@ -158,9 +207,14 @@ def main(): loop = asyncio.get_event_loop() parser = argparse.ArgumentParser() parser.add_argument("--target", help="domain of the page to be cloned", required=True) + parser.add_argument("--max-depth", help="max depth of the cloning", required=False, default=sys.maxsize) args = parser.parse_args() - cloner = Cloner(args.target) - loop.run_until_complete(cloner.run()) + try: + cloner = Cloner(args.target, int(args.max_depth)) + loop.run_until_complete(cloner.get_root_host()) + loop.run_until_complete(cloner.run()) + except KeyboardInterrupt: + pass if __name__ == '__main__': diff --git a/converter.py b/converter.py new file mode 100644 index 00000000..0b9080bf --- /dev/null +++ b/converter.py @@ -0,0 +1,31 @@ +import os +import hashlib +from os import walk +import mimetypes +import json +import shutil + + +class Converter: + def __init__(self): + self.meta = {} + + def convert(self, path): + files_to_convert = [] + + for (dirpath, dirnames, filenames) in walk(path): + for fn in filenames: + files_to_convert.append(os.path.join(dirpath, fn)) + + for fn in files_to_convert: + path_len = len(path) + file_name = fn[path_len:] + m = hashlib.md5() + m.update(fn.encode('utf-8')) + hash_name = m.hexdigest() + self.meta[file_name] = {'hash': hash_name, 'content_type': mimetypes.guess_type(file_name)[0]} + shutil.copyfile(fn, os.path.join(path, hash_name)) + os.remove(fn) + + with open(os.path.join(path, 'meta.json'), 'w') as mj: + json.dump(self.meta, mj) diff --git a/snare.py b/snare.py index fdc5fe5a..bc107997 100644 --- a/snare.py +++ b/snare.py @@ -18,6 +18,7 @@ import configparser import grp import json +import mimetypes import multiprocessing import os import pwd @@ -29,7 +30,6 @@ from versions_manager import VersionManager import aiohttp import git -import mimetypes import pip from aiohttp import MultiDict @@ -41,29 +41,33 @@ from bs4 import BeautifulSoup import cssutils import netifaces as ni - +from converter import Converter class HttpRequestHandler(aiohttp.server.ServerHttpProtocol): - def __init__(self, run_args, debug=False, keep_alive=75, **kwargs): + def __init__(self, meta, run_args, debug=False, keep_alive=75, **kwargs): self.dorks = [] + self.run_args = run_args + self.dir = '/opt/snare/pages/{}'.format(run_args.page_dir) + + self.meta = meta + self.sroute = StaticRoute( name=None, prefix='/', - directory='/opt/snare/pages/{}'.format(run_args.page_dir) + directory=self.dir ) super().__init__(debug=debug, keep_alive=keep_alive, access_log=None, **kwargs) - @asyncio.coroutine - def get_dorks(self): + async def get_dorks(self): dorks = None try: with aiohttp.Timeout(10.0): with aiohttp.ClientSession() as session: - r = yield from session.get( + r = await session.get( 'http://{0}:8090/dorks'.format(self.run_args.tanner) ) try: - dorks = yield from r.json() + dorks = await r.json() except json.decoder.JSONDecodeError as e: print(e) finally: @@ -72,12 +76,11 @@ def get_dorks(self): print('Dorks timeout') return dorks['response']['dorks'] if dorks else [] - @asyncio.coroutine - def submit_slurp(self, data): + async def submit_slurp(self, data): try: with aiohttp.Timeout(10.0): with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: - r = yield from session.post( + r = await session.post( 'https://{0}:8080/api?auth={1}&chan=snare_test&msg={2}'.format( self.run_args.slurp_host, self.run_args.slurp_auth, data ), data=json.dumps(data) @@ -111,17 +114,16 @@ def create_data(self, request, response_status): data['cookies'] = {cookie.split('=')[0]: cookie.split('=')[1] for cookie in header['Cookie'].split('; ')} return data - @asyncio.coroutine - def submit_data(self, data): + async def submit_data(self, data): event_result = None try: with aiohttp.Timeout(10.0): with aiohttp.ClientSession() as session: - r = yield from session.post( + r = await session.post( 'http://{0}:8090/event'.format(self.run_args.tanner), data=json.dumps(data) ) try: - event_result = yield from r.json() + event_result = await r.json() except json.decoder.JSONDecodeError as e: print(e, data) finally: @@ -130,8 +132,7 @@ def submit_data(self, data): raise e return event_result - @asyncio.coroutine - def handle_html_content(self, content): + async def handle_html_content(self, content): soup = BeautifulSoup(content, 'html.parser') for p_elem in soup.find_all('p'): if p_elem.findChildren(): @@ -144,7 +145,7 @@ def handle_html_content(self, content): for idx, word in enumerate(text_list): # Fetch dorks if required if len(self.dorks) <= 0: - self.dorks = yield from self.get_dorks() + self.dorks = await self.get_dorks() word += ' ' if idx % 5 == 0: a_tag = soup.new_tag( @@ -162,12 +163,11 @@ def handle_html_content(self, content): content = soup.encode('utf-8') return content - @asyncio.coroutine - def handle_request(self, request, payload): + async def handle_request(self, request, payload): print('Request path: {0}'.format(request.path)) data = self.create_data(request, 200) if request.method == 'POST': - post_data = yield from payload.read() + post_data = await payload.read() post_data = MultiDict(parse_qsl(post_data.decode('utf-8'))) print('POST data:') for key, val in post_data.items(): @@ -175,63 +175,19 @@ def handle_request(self, request, payload): data['post_data'] = dict(post_data) # Submit the event to the TANNER service - event_result = yield from self.submit_data(data) + event_result = await self.submit_data(data) # Log the event to slurp service if enabled if self.run_args.slurp_enabled: - yield from self.submit_slurp(request.path) + await self.submit_slurp(request.path) + + content, content_type, headers, status_code = await self.parse_tanner_response(request.path, event_result['response']['message']['detection']) response = aiohttp.Response( - self.writer, status=200, http_version=request.version + self.writer, status=status_code, http_version=request.version ) - content_type = None - mimetypes.add_type('text/html','.php') - mimetypes.add_type('text/html', '.aspx') - base_path = os.path.join('/opt/snare/pages', self.run_args.page_dir) - if event_result is not None and ('payload' in event_result['response']['message']['detection'] and event_result['response']['message']['detection']['payload'] is not None): - payload_content = event_result['response']['message']['detection']['payload'] - if type(payload_content) == dict: - if payload_content['page'].startswith('/'): - payload_content['page'] = payload_content['page'][1:] - page_path = os.path.join(base_path, payload_content['page']) - content = '' - if os.path.exists(page_path): - content_type = mimetypes.guess_type(page_path)[0] - with open(page_path, encoding='utf-8') as p: - content = p.read() - soup = BeautifulSoup(content, 'html.parser') - script_tag = soup.new_tag('div') - script_tag.append(BeautifulSoup(payload_content['value'], 'html.parser')) - soup.body.append(script_tag) - content = str(soup).encode() - - else: - content_type = mimetypes.guess_type(payload_content)[0] - content = payload_content.encode('utf-8') - else: - query = None - if request.path == '/': - parsed_url = self.run_args.index_page - else: - parsed_url = urlparse(unquote(request.path)) - if parsed_url.query: - query = '?' + parsed_url.query - parsed_url = parsed_url.path - if parsed_url.startswith('/'): - parsed_url = parsed_url[1:] - path = os.path.normpath(os.path.join(base_path, parsed_url)) - if os.path.isfile(path) and path.startswith(base_path): - content_type = mimetypes.guess_type(path)[0] - with open(path, 'rb') as fh: - content = fh.read() - if content_type: - if 'text/html' in content_type: - content = yield from self.handle_html_content(content) - else: - content_type = None - content = None - response = aiohttp.Response( - self.writer, status=404, http_version=request.version - ) + for name, val in headers.items(): + response.add_header(name, val) + response.add_header('Server', self.run_args.server_header) if 'cookies' in data and 'sess_uuid' in data['cookies']: @@ -253,7 +209,60 @@ def handle_request(self, request, payload): response.send_headers() if content: response.write(content) - yield from response.write_eof() + await response.write_eof() + + async def parse_tanner_response(self, requested_name, detection): + content_type = None + content = None + status_code = 200 + headers = {} + + if detection['type'] == 1: + if requested_name == '/': + requested_name = self.run_args.index_page + try: + requested_name = unquote(requested_name) + file_name = self.meta[requested_name]['hash'] + content_type = self.meta[requested_name]['content_type'] + except KeyError: + status_code = 404 + else: + path = os.path.join(self.dir, file_name) + if os.path.isfile(path): + with open(path, 'rb') as fh: + content = fh.read() + if content_type: + if 'text/html' in content_type: + content = await self.handle_html_content(content) + + elif detection['type'] == 2: + payload_content = detection['payload'] + if payload_content['page']: + try: + file_name = self.meta[payload_content['page']]['hash'] + content_type = self.meta[payload_content['page']]['content_type'] + page_path = os.path.join(self.dir, file_name) + with open(page_path, encoding='utf-8') as p: + content = p.read() + except KeyError: + content = '' + content_type = 'text\html' + + soup = BeautifulSoup(content, 'html.parser') + script_tag = soup.new_tag('div') + script_tag.append(BeautifulSoup(payload_content['value'], 'html.parser')) + soup.body.append(script_tag) + content = str(soup).encode() + else: + content_type = mimetypes.guess_type(payload_content['value'])[0] + content = payload_content['value'].encode('utf-8') + + if 'headers' in payload_content: + headers = payload_content['headers'] + else: + status_code = payload_content['status_code'] + + return (content, content_type, headers, status_code) def handle_error(self, status=500, message=None, payload=None, exc=None, headers=None, reason=None): @@ -383,21 +392,20 @@ def parse_timeout(timeout): return result -@asyncio.coroutine -def check_tanner(): +async def check_tanner(): vm = VersionManager() with aiohttp.ClientSession() as client: req_url = 'http://{}:8090/version'.format(args.tanner) try: - resp = yield from client.get(req_url) - result = yield from resp.json() + resp = await client.get(req_url) + result = await resp.json() version = result["version"] vm.check_compatibility(version) except aiohttp.errors.ClientOSError: print("Can't connect to tanner host {}".format(req_url)) exit(1) else: - yield from resp.release() + await resp.release() if __name__ == '__main__': @@ -429,21 +437,33 @@ def check_tanner(): parser.add_argument("--update-timeout", help="update snare every timeout ", default='24H') parser.add_argument("--server-header", help="set server-header", default='nginx') args = parser.parse_args() - + base_path = '/opt/snare/' + base_page_path = '/opt/snare/pages/' config = configparser.ConfigParser() - config.read('/opt/snare/' + args.config) + config.read(os.path.join(base_path,args.config)) + if args.list_pages: print('Available pages:\n') - for page in os.listdir('/opt/snare/pages/'): + for page in os.listdir(base_page_path): print('\t- {}'.format(page)) print('\nuse with --page-dir {page_name}\n\n') exit() - if not os.path.exists('/opt/snare/pages/' + args.page_dir): + full_page_path = os.path.join(base_page_path, args.page_dir) + if not os.path.exists(full_page_path): print("--page-dir: {0} does not exist".format(args.page_dir)) exit() - if not os.path.exists('/opt/snare/pages/' + args.page_dir + "/" + args.index_page): - print('can\'t crate meta tag') + args.index_page = os.path.join("/", args.index_page) + + if not os.path.exists(os.path.join(full_page_path, 'meta.json')): + conv = Converter() + conv.convert(full_page_path) + print("pages was converted. Try to clone again for the better result.") + + with open(os.path.join(full_page_path, 'meta.json')) as meta: + meta_info = json.load(meta) + if not os.path.exists(os.path.join(base_page_path,args.page_dir,os.path.join(meta_info[args.index_page]['hash']))): + print('can\'t create meta tag') else: add_meta_tag(args.page_dir, args.index_page) loop = asyncio.get_event_loop() @@ -460,8 +480,8 @@ def check_tanner(): else: host_ip = args.host_ip future = loop.create_server( - lambda: HttpRequestHandler(args, debug=args.debug, keep_alive=75), - args.interface, int(args.port)) + lambda: HttpRequestHandler(meta_info, args, debug=args.debug, keep_alive=75), + args.host_ip, int(args.port)) srv = loop.run_until_complete(future) drop_privileges() diff --git a/versions_manager.py b/versions_manager.py index e61cc8e5..5e7aba56 100644 --- a/versions_manager.py +++ b/versions_manager.py @@ -3,13 +3,15 @@ class VersionManager: def __init__(self): - self.version = "0.1.0" + self.version = "0.2.0" self.version_mapper = { - "0.1.0": "0.4.0" + "0.1.0": ["0.1.0","0.4.0"], + "0.2.0" : ["0.5.0", "0.5.0"] } def check_compatibility(self, tanner_version): - max_version = self.version_mapper[self.version] - if not StrictVersion(tanner_version) <= StrictVersion(max_version): - print("Wrong tanner version: {}. Need version: {} or less".format(tanner_version, max_version)) + min_version = self.version_mapper[self.version][0] + max_version = self.version_mapper[self.version][1] + if not (StrictVersion(min_version) <= StrictVersion(tanner_version) <= StrictVersion(max_version)): + print("Wrong tanner version: {}. Compatible versions are {} - {}".format(tanner_version, min_version, max_version)) exit(1)