From 4832744fd62dbb6bbc2f0fe01e08782aa3d2c53d Mon Sep 17 00:00:00 2001 From: ayeowch Date: Tue, 8 Jan 2013 23:50:26 +0800 Subject: [PATCH] initial revision --- README.md | 124 +++++++++++++++++++++++++++ traceroute.py | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 352 insertions(+) create mode 100644 README.md create mode 100644 traceroute.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..6389dac --- /dev/null +++ b/README.md @@ -0,0 +1,124 @@ +# traceroute +Traceroute is a Python script that allows you to get traceroute results with associated geolocation information for each hop for a specified host from geographically distant source(s). + +## Installation + +Save traceroute.py into a directory with its path stored in your PYTHONPATH environment variable. + +## Usage + +Try the following from your Python interpreter: + + >>> from traceroute import Traceroute + >>> traceroute = Traceroute(ip_address='8.8.8.8') + >>> hops = traceroute.traceroute() + >>> hops + [{'latitude': 40.3756, 'rtt': '0.862 ms', 'ip_address': '128.112.128.114', 'longitude': -74.6597, 'hop_num': 1}, {'latitude': 40.3756, 'rtt': '0.462 ms', 'ip_address': '128.112.12.22', 'longitude': -74.6597, 'hop_num': 2}, {'latitude': 40.3265, 'rtt': '8.374 ms', 'ip_address': '24.104.128.89', 'longitude': -75.3697, 'hop_num': 3}, {'latitude': 38.0, 'rtt': '21.010 ms', 'ip_address': '68.86.84.177', 'longitude': -97.0, 'hop_num': 4}, {'latitude': 38.0, 'rtt': '12.218 ms', 'ip_address': '68.86.86.70', 'longitude': -97.0, 'hop_num': 5}, {'latitude': 38.0, 'rtt': '31.103 ms', 'ip_address': '75.149.231.62', 'longitude': -97.0, 'hop_num': 6}, {'latitude': 37.4192, 'rtt': '14.752 ms', 'ip_address': '209.85.252.80', 'longitude': -122.0574, 'hop_num': 7}, {'latitude': 37.4192, 'rtt': '18.437 ms', 'ip_address': '72.14.238.18', 'longitude': -122.0574, 'hop_num': 9}, {'latitude': 37.4192008972168, 'rtt': '18.610 ms', 'ip_address': '72.14.238.70', 'longitude': -122.05740356445312, 'hop_num': 9}, {'latitude': 37.4192, 'rtt': '16.696 ms', 'ip_address': '72.14.238.18', 'longitude': -122.0574, 'hop_num': 9}, {'latitude': 37.4192, 'rtt': '24.441 ms', 'ip_address': '72.14.232.21', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '13.925 ms', 'ip_address': '216.239.49.149', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '19.800 ms', 'ip_address': '72.14.232.21', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '14.144 ms', 'ip_address': '8.8.8.8', 'longitude': -122.0574, 'hop_num': 11}] + >>> import json + >>> json.dumps(hops, indent=2) + '[\n {\n "latitude": 40.3756, \n "rtt": "0.862 ms", \n "ip_address": "128.112.128.114", \n "longitude": -74.6597, \n "hop_num": 1\n }, \n {\n "latitude": 40.3756, \n "rtt": "0.462 ms", \n "ip_address": "128.112.12.22", \n "longitude": -74.6597, \n "hop_num": 2\n }, \n {\n "latitude": 40.3265, \n "rtt": "8.374 ms", \n "ip_address": "24.104.128.89", \n "longitude": -75.3697, \n "hop_num": 3\n }, \n {\n "latitude": 38.0, \n "rtt": "21.010 ms", \n "ip_address": "68.86.84.177", \n "longitude": -97.0, \n "hop_num": 4\n }, \n {\n "latitude": 38.0, \n "rtt": "12.218 ms", \n "ip_address": "68.86.86.70", \n "longitude": -97.0, \n "hop_num": 5\n }, \n {\n "latitude": 38.0, \n "rtt": "31.103 ms", \n "ip_address": "75.149.231.62", \n "longitude": -97.0, \n "hop_num": 6\n }, \n {\n "latitude": 37.4192, \n "rtt": "14.752 ms", \n "ip_address": "209.85.252.80", \n "longitude": -122.0574, \n "hop_num": 7\n }, \n {\n "latitude": 37.4192, \n "rtt": "18.437 ms", \n "ip_address": "72.14.238.18", \n "longitude": -122.0574, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192008972168, \n "rtt": "18.610 ms", \n "ip_address": "72.14.238.70", \n "longitude": -122.05740356445312, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192, \n "rtt": "16.696 ms", \n "ip_address": "72.14.238.18", \n "longitude": -122.0574, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192, \n "rtt": "24.441 ms", \n "ip_address": "72.14.232.21", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "13.925 ms", \n "ip_address": "216.239.49.149", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "19.800 ms", \n "ip_address": "72.14.232.21", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "14.144 ms", \n "ip_address": "8.8.8.8", \n "longitude": -122.0574, \n "hop_num": 11\n }\n]' + >>> + +You can also run the script directly by passing in the --ip_address option: + + $ python traceroute.py --ip_address=8.8.8.8 + [ + { + "latitude": 40.3756, + "rtt": "0.862 ms", + "ip_address": "128.112.128.114", + "longitude": -74.6597, + "hop_num": 1 + }, + { + "latitude": 40.3756, + "rtt": "0.462 ms", + "ip_address": "128.112.12.22", + "longitude": -74.6597, + "hop_num": 2 + }, + { + "latitude": 40.3265, + "rtt": "8.374 ms", + "ip_address": "24.104.128.89", + "longitude": -75.3697, + "hop_num": 3 + }, + { + "latitude": 38.0, + "rtt": "21.010 ms", + "ip_address": "68.86.84.177", + "longitude": -97.0, + "hop_num": 4 + }, + { + "latitude": 38.0, + "rtt": "12.218 ms", + "ip_address": "68.86.86.70", + "longitude": -97.0, + "hop_num": 5 + }, + { + "latitude": 38.0, + "rtt": "31.103 ms", + "ip_address": "75.149.231.62", + "longitude": -97.0, + "hop_num": 6 + }, + { + "latitude": 37.4192, + "rtt": "14.752 ms", + "ip_address": "209.85.252.80", + "longitude": -122.0574, + "hop_num": 7 + }, + { + "latitude": 37.4192, + "rtt": "18.437 ms", + "ip_address": "72.14.238.18", + "longitude": -122.0574, + "hop_num": 9 + }, + { + "latitude": 37.4192008972168, + "rtt": "18.610 ms", + "ip_address": "72.14.238.70", + "longitude": -122.05740356445312, + "hop_num": 9 + }, + { + "latitude": 37.4192, + "rtt": "16.696 ms", + "ip_address": "72.14.238.18", + "longitude": -122.0574, + "hop_num": 9 + }, + { + "latitude": 37.4192, + "rtt": "24.441 ms", + "ip_address": "72.14.232.21", + "longitude": -122.0574, + "hop_num": 10 + }, + { + "latitude": 37.4192, + "rtt": "13.925 ms", + "ip_address": "216.239.49.149", + "longitude": -122.0574, + "hop_num": 10 + }, + { + "latitude": 37.4192, + "rtt": "19.800 ms", + "ip_address": "72.14.232.21", + "longitude": -122.0574, + "hop_num": 10 + }, + { + "latitude": 37.4192, + "rtt": "14.144 ms", + "ip_address": "8.8.8.8", + "longitude": -122.0574, + "hop_num": 11 + } + ] diff --git a/traceroute.py b/traceroute.py new file mode 100644 index 0000000..43440b0 --- /dev/null +++ b/traceroute.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +traceroute - Get traceroute results with associated geolocation information +for each hop for a specified host from geographically distant source(s). +""" + +__author__ = 'Dazzlepod (info@dazzlepod.com)' +__copyright__ = 'Copyright (c) 2013 Dazzlepod' +__version__ = '$Revision: #7 $' + +import datetime +import json +import optparse +import os +import re +import sys +import urllib +import urllib2 +from multiprocessing import Process +from subprocess import Popen, PIPE + + +class Traceroute(object): + """Traceroute instance.""" + def __init__(self, ip_address='8.8.8.8', debug=False): + super(Traceroute, self).__init__() + self.ip_address = ip_address + self.debug = debug + # cache geocoded IP addresses during the lifetime of this instance + self.locations = {} + + def traceroute(self): + """Instead of running the actual traceroute command, we will fetch + standard traceroute results from several publicly available webpages + that are listed at traceroute.org. + + For each hop, we will then attach geolocation information to it.""" + self.print_debug("ip_address = %s" % self.ip_address) + if not os.path.exists('%s.txt' % self.ip_address): + (status_code, traceroute) = self.get_traceroute_output() + f = open('%s.txt' % self.ip_address, 'w') + f.write(traceroute) + f.close() + + traceroute = open('%s.txt' % self.ip_address, 'r').read() + + # hops = dicts with keys: hop_num, hosts + hops = self.get_hops(traceroute) + + # hops = dicts with keys: hop_num, hostname, ip_address, rtt + hops = self.get_formatted_hops(hops) + + # hops = dicts with keys: hop_num, hostname, ip_address, rtt, location + hops = self.get_geocoded_hops(hops) + + # hops = dicts with keys: hop_num, ip_address, latitude, longitude, rtt + hops = self.get_stripped_hops(hops) + + return hops + + def get_traceroute_output(self): + """Fetch traceroute output from a webpage.""" + + # Example from traceroute.org + url = "http://www.net.princeton.edu/cgi-bin/traceroute.pl" + (status_code, response) = self.urlopen(url, context = {'target': self.ip_address}) + + pattern = re.compile(r'(?P.*?)', re.DOTALL|re.IGNORECASE) + traceroute = re.findall(pattern, response)[0].strip() + + return (status_code, traceroute) + + def get_hops(self, traceroute): + """Get hops from a traceroute output and return the hops in an array + of dicts each representing hop number and the associated hosts data.""" + hops = [] + + lines = traceroute.split('\n') + for line in lines: + line = line.strip() + hop = {} + if not line: continue + try: + hop = re.match(r'^(?P\d+)(?P.*?)$', line).groupdict() + except AttributeError: + continue + self.print_debug(hop) + hops.append(hop) + + return hops + + def get_formatted_hops(self, hops): + """hosts data from get_hops() is represented in a single string. + We use this function to better represent the hosts data in a dict.""" + formatted_hops = [] + + for hop in hops: + hop_num = int(hop['hop_num'].strip()) + hosts = hop['hosts'].replace(' ', ' ').strip() + + # Using re.findall(), first we split the hosts, then for each host we store a tuple containing hostname, IP address and the first round-trip time + # [('', '', ' ms'), ('', '', ' ms')] + hosts = re.findall(r'(?P[\w.-]+) \((?P[\d.]+)\) (?P\d{1,4}.\d{1,4} ms)', hosts) + + for host in hosts: + hop_context = { + 'hop_num': hop_num, + 'hostname': host[0], + 'ip_address': host[1], + 'rtt': host[2], + } + self.print_debug(hop_context) + formatted_hops.append(hop_context) + + return formatted_hops + + def get_geocoded_hops(self, hops): + """Return hops from get_formatted_hops() with geolocation information + for each hop.""" + for hop in hops: + ip_address = hop['ip_address'] + location = None + if self.locations.has_key(ip_address): + location = self.locations[ip_address] + else: + location = self.get_location(ip_address) + self.locations[ip_address] = location + hop['location'] = location + + return hops + + def get_stripped_hops(self, hops): + """ Return hops from get_geocoded_hops() with each dict containing + data ready for plotting with e.g. Google Maps JavaScript API.""" + stripped_hops = [] + + for hop in hops: + if hop['location']: + lat = hop['location']['latitude'] + lon = hop['location']['longitude'] + stripped_hops.append({ + 'hop_num': hop['hop_num'], + 'ip_address': hop['ip_address'], + 'latitude': lat, + 'longitude': lon, + 'rtt': hop['rtt'], + }) + + return stripped_hops + + def get_location(self, ip_address): + """Return geolocation information for the specified IP address, e.g.: + {"ip": "75.126.24.77", + "hostname": "web365.webfaction.com", + "isp": "SoftLayer Technologies", + "organization": "Client Intellect", + "country": "United States", + "region": "Texas", + "city": "Dallas", + "latitude": 32.9299, + "longitude": -96.8353} + """ + location = None + url = "https://dazzlepod.com/ip/%s.json" % ip_address + (status_code, json_data) = self.urlopen(url) + if status_code == 200 and json_data: + tmp_location = json.loads(json_data) + if tmp_location.has_key('latitude') and tmp_location.has_key('longitude'): + location = tmp_location + return location + + def urlopen(self, url, context = None): + """Perform HTTP GET/POST on the specified URL and return the resultant + status code and response. + """ + status_code = 200 + + request = urllib2.Request(url = url) + + if context: + data = urllib.urlencode(context) + request.add_data(data) + + response = '' + try: + response = urllib2.urlopen(request).read() + except urllib2.HTTPError, e: + status_code = e.code + except urllib2.URLError: + pass + + try: + self.urlopen_count += 1 + except AttributeError: + self.urlopen_count = 1 + self.print_debug("[%d] url = %s, status_code = %d" % (self.urlopen_count, url, status_code)) + + return (status_code, response) + + def print_debug(self, msg): + """Print debug message to standard output.""" + if self.debug: + print "[DEBUG %s] %s" % (datetime.datetime.now(), msg) + + +def main(): + usage = """%prog --ip_address=IP_ADDRESS""" + cmdparser = optparse.OptionParser(usage, version=("traceroute " + __version__)) + cmdparser.add_option("-i", "--ip_address", type="string", default="", help="IP address of destination host") + cmdparser.add_option("-d", "--debug", action="store_true", default=False, help="Show debug output") + + (options, args) = cmdparser.parse_args() + + if options.ip_address: + traceroute = Traceroute(ip_address=options.ip_address, debug=options.debug) + hops = traceroute.traceroute() + hops = json.dumps(hops, indent=2) + print hops + else: + cmdparser.print_usage() + + return 0 + + +if __name__ == '__main__': + sys.exit(main())