initial revision

This commit is contained in:
ayeowch 2013-01-08 23:50:26 +08:00
commit 4832744fd6
2 changed files with 352 additions and 0 deletions

124
README.md Normal file
View file

@ -0,0 +1,124 @@
# traceroute
Traceroute is a Python script that allows you to get traceroute results with associated geolocation information for each hop for a specified host from geographically distant source(s).
## Installation
Save traceroute.py into a directory with its path stored in your PYTHONPATH environment variable.
## Usage
Try the following from your Python interpreter:
>>> from traceroute import Traceroute
>>> traceroute = Traceroute(ip_address='8.8.8.8')
>>> hops = traceroute.traceroute()
>>> hops
[{'latitude': 40.3756, 'rtt': '0.862 ms', 'ip_address': '128.112.128.114', 'longitude': -74.6597, 'hop_num': 1}, {'latitude': 40.3756, 'rtt': '0.462 ms', 'ip_address': '128.112.12.22', 'longitude': -74.6597, 'hop_num': 2}, {'latitude': 40.3265, 'rtt': '8.374 ms', 'ip_address': '24.104.128.89', 'longitude': -75.3697, 'hop_num': 3}, {'latitude': 38.0, 'rtt': '21.010 ms', 'ip_address': '68.86.84.177', 'longitude': -97.0, 'hop_num': 4}, {'latitude': 38.0, 'rtt': '12.218 ms', 'ip_address': '68.86.86.70', 'longitude': -97.0, 'hop_num': 5}, {'latitude': 38.0, 'rtt': '31.103 ms', 'ip_address': '75.149.231.62', 'longitude': -97.0, 'hop_num': 6}, {'latitude': 37.4192, 'rtt': '14.752 ms', 'ip_address': '209.85.252.80', 'longitude': -122.0574, 'hop_num': 7}, {'latitude': 37.4192, 'rtt': '18.437 ms', 'ip_address': '72.14.238.18', 'longitude': -122.0574, 'hop_num': 9}, {'latitude': 37.4192008972168, 'rtt': '18.610 ms', 'ip_address': '72.14.238.70', 'longitude': -122.05740356445312, 'hop_num': 9}, {'latitude': 37.4192, 'rtt': '16.696 ms', 'ip_address': '72.14.238.18', 'longitude': -122.0574, 'hop_num': 9}, {'latitude': 37.4192, 'rtt': '24.441 ms', 'ip_address': '72.14.232.21', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '13.925 ms', 'ip_address': '216.239.49.149', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '19.800 ms', 'ip_address': '72.14.232.21', 'longitude': -122.0574, 'hop_num': 10}, {'latitude': 37.4192, 'rtt': '14.144 ms', 'ip_address': '8.8.8.8', 'longitude': -122.0574, 'hop_num': 11}]
>>> import json
>>> json.dumps(hops, indent=2)
'[\n {\n "latitude": 40.3756, \n "rtt": "0.862 ms", \n "ip_address": "128.112.128.114", \n "longitude": -74.6597, \n "hop_num": 1\n }, \n {\n "latitude": 40.3756, \n "rtt": "0.462 ms", \n "ip_address": "128.112.12.22", \n "longitude": -74.6597, \n "hop_num": 2\n }, \n {\n "latitude": 40.3265, \n "rtt": "8.374 ms", \n "ip_address": "24.104.128.89", \n "longitude": -75.3697, \n "hop_num": 3\n }, \n {\n "latitude": 38.0, \n "rtt": "21.010 ms", \n "ip_address": "68.86.84.177", \n "longitude": -97.0, \n "hop_num": 4\n }, \n {\n "latitude": 38.0, \n "rtt": "12.218 ms", \n "ip_address": "68.86.86.70", \n "longitude": -97.0, \n "hop_num": 5\n }, \n {\n "latitude": 38.0, \n "rtt": "31.103 ms", \n "ip_address": "75.149.231.62", \n "longitude": -97.0, \n "hop_num": 6\n }, \n {\n "latitude": 37.4192, \n "rtt": "14.752 ms", \n "ip_address": "209.85.252.80", \n "longitude": -122.0574, \n "hop_num": 7\n }, \n {\n "latitude": 37.4192, \n "rtt": "18.437 ms", \n "ip_address": "72.14.238.18", \n "longitude": -122.0574, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192008972168, \n "rtt": "18.610 ms", \n "ip_address": "72.14.238.70", \n "longitude": -122.05740356445312, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192, \n "rtt": "16.696 ms", \n "ip_address": "72.14.238.18", \n "longitude": -122.0574, \n "hop_num": 9\n }, \n {\n "latitude": 37.4192, \n "rtt": "24.441 ms", \n "ip_address": "72.14.232.21", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "13.925 ms", \n "ip_address": "216.239.49.149", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "19.800 ms", \n "ip_address": "72.14.232.21", \n "longitude": -122.0574, \n "hop_num": 10\n }, \n {\n "latitude": 37.4192, \n "rtt": "14.144 ms", \n "ip_address": "8.8.8.8", \n "longitude": -122.0574, \n "hop_num": 11\n }\n]'
>>>
You can also run the script directly by passing in the --ip_address option:
$ python traceroute.py --ip_address=8.8.8.8
[
{
"latitude": 40.3756,
"rtt": "0.862 ms",
"ip_address": "128.112.128.114",
"longitude": -74.6597,
"hop_num": 1
},
{
"latitude": 40.3756,
"rtt": "0.462 ms",
"ip_address": "128.112.12.22",
"longitude": -74.6597,
"hop_num": 2
},
{
"latitude": 40.3265,
"rtt": "8.374 ms",
"ip_address": "24.104.128.89",
"longitude": -75.3697,
"hop_num": 3
},
{
"latitude": 38.0,
"rtt": "21.010 ms",
"ip_address": "68.86.84.177",
"longitude": -97.0,
"hop_num": 4
},
{
"latitude": 38.0,
"rtt": "12.218 ms",
"ip_address": "68.86.86.70",
"longitude": -97.0,
"hop_num": 5
},
{
"latitude": 38.0,
"rtt": "31.103 ms",
"ip_address": "75.149.231.62",
"longitude": -97.0,
"hop_num": 6
},
{
"latitude": 37.4192,
"rtt": "14.752 ms",
"ip_address": "209.85.252.80",
"longitude": -122.0574,
"hop_num": 7
},
{
"latitude": 37.4192,
"rtt": "18.437 ms",
"ip_address": "72.14.238.18",
"longitude": -122.0574,
"hop_num": 9
},
{
"latitude": 37.4192008972168,
"rtt": "18.610 ms",
"ip_address": "72.14.238.70",
"longitude": -122.05740356445312,
"hop_num": 9
},
{
"latitude": 37.4192,
"rtt": "16.696 ms",
"ip_address": "72.14.238.18",
"longitude": -122.0574,
"hop_num": 9
},
{
"latitude": 37.4192,
"rtt": "24.441 ms",
"ip_address": "72.14.232.21",
"longitude": -122.0574,
"hop_num": 10
},
{
"latitude": 37.4192,
"rtt": "13.925 ms",
"ip_address": "216.239.49.149",
"longitude": -122.0574,
"hop_num": 10
},
{
"latitude": 37.4192,
"rtt": "19.800 ms",
"ip_address": "72.14.232.21",
"longitude": -122.0574,
"hop_num": 10
},
{
"latitude": 37.4192,
"rtt": "14.144 ms",
"ip_address": "8.8.8.8",
"longitude": -122.0574,
"hop_num": 11
}
]

228
traceroute.py Normal file
View file

@ -0,0 +1,228 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
traceroute - Get traceroute results with associated geolocation information
for each hop for a specified host from geographically distant source(s).
"""
__author__ = 'Dazzlepod (info@dazzlepod.com)'
__copyright__ = 'Copyright (c) 2013 Dazzlepod'
__version__ = '$Revision: #7 $'
import datetime
import json
import optparse
import os
import re
import sys
import urllib
import urllib2
from multiprocessing import Process
from subprocess import Popen, PIPE
class Traceroute(object):
"""Traceroute instance."""
def __init__(self, ip_address='8.8.8.8', debug=False):
super(Traceroute, self).__init__()
self.ip_address = ip_address
self.debug = debug
# cache geocoded IP addresses during the lifetime of this instance
self.locations = {}
def traceroute(self):
"""Instead of running the actual traceroute command, we will fetch
standard traceroute results from several publicly available webpages
that are listed at traceroute.org.
For each hop, we will then attach geolocation information to it."""
self.print_debug("ip_address = %s" % self.ip_address)
if not os.path.exists('%s.txt' % self.ip_address):
(status_code, traceroute) = self.get_traceroute_output()
f = open('%s.txt' % self.ip_address, 'w')
f.write(traceroute)
f.close()
traceroute = open('%s.txt' % self.ip_address, 'r').read()
# hops = dicts with keys: hop_num, hosts
hops = self.get_hops(traceroute)
# hops = dicts with keys: hop_num, hostname, ip_address, rtt
hops = self.get_formatted_hops(hops)
# hops = dicts with keys: hop_num, hostname, ip_address, rtt, location
hops = self.get_geocoded_hops(hops)
# hops = dicts with keys: hop_num, ip_address, latitude, longitude, rtt
hops = self.get_stripped_hops(hops)
return hops
def get_traceroute_output(self):
"""Fetch traceroute output from a webpage."""
# Example from traceroute.org
url = "http://www.net.princeton.edu/cgi-bin/traceroute.pl"
(status_code, response) = self.urlopen(url, context = {'target': self.ip_address})
pattern = re.compile(r'<pre.*?>(?P<traceroute>.*?)</pre>', re.DOTALL|re.IGNORECASE)
traceroute = re.findall(pattern, response)[0].strip()
return (status_code, traceroute)
def get_hops(self, traceroute):
"""Get hops from a traceroute output and return the hops in an array
of dicts each representing hop number and the associated hosts data."""
hops = []
lines = traceroute.split('\n')
for line in lines:
line = line.strip()
hop = {}
if not line: continue
try:
hop = re.match(r'^(?P<hop_num>\d+)(?P<hosts>.*?)$', line).groupdict()
except AttributeError:
continue
self.print_debug(hop)
hops.append(hop)
return hops
def get_formatted_hops(self, hops):
"""hosts data from get_hops() is represented in a single string.
We use this function to better represent the hosts data in a dict."""
formatted_hops = []
for hop in hops:
hop_num = int(hop['hop_num'].strip())
hosts = hop['hosts'].replace(' ', ' ').strip()
# Using re.findall(), first we split the hosts, then for each host we store a tuple containing hostname, IP address and the first round-trip time
# [('<HOSTNAME>', '<IP_ADDRESS>', '<RTT1> ms'), ('<HOSTNAME_N>', '<IP_ADDRESS_N>', '<RTT1_N> ms')]
hosts = re.findall(r'(?P<hostname>[\w.-]+) \((?P<ip_address>[\d.]+)\) (?P<rtt>\d{1,4}.\d{1,4} ms)', hosts)
for host in hosts:
hop_context = {
'hop_num': hop_num,
'hostname': host[0],
'ip_address': host[1],
'rtt': host[2],
}
self.print_debug(hop_context)
formatted_hops.append(hop_context)
return formatted_hops
def get_geocoded_hops(self, hops):
"""Return hops from get_formatted_hops() with geolocation information
for each hop."""
for hop in hops:
ip_address = hop['ip_address']
location = None
if self.locations.has_key(ip_address):
location = self.locations[ip_address]
else:
location = self.get_location(ip_address)
self.locations[ip_address] = location
hop['location'] = location
return hops
def get_stripped_hops(self, hops):
""" Return hops from get_geocoded_hops() with each dict containing
data ready for plotting with e.g. Google Maps JavaScript API."""
stripped_hops = []
for hop in hops:
if hop['location']:
lat = hop['location']['latitude']
lon = hop['location']['longitude']
stripped_hops.append({
'hop_num': hop['hop_num'],
'ip_address': hop['ip_address'],
'latitude': lat,
'longitude': lon,
'rtt': hop['rtt'],
})
return stripped_hops
def get_location(self, ip_address):
"""Return geolocation information for the specified IP address, e.g.:
{"ip": "75.126.24.77",
"hostname": "web365.webfaction.com",
"isp": "SoftLayer Technologies",
"organization": "Client Intellect",
"country": "United States",
"region": "Texas",
"city": "Dallas",
"latitude": 32.9299,
"longitude": -96.8353}
"""
location = None
url = "https://dazzlepod.com/ip/%s.json" % ip_address
(status_code, json_data) = self.urlopen(url)
if status_code == 200 and json_data:
tmp_location = json.loads(json_data)
if tmp_location.has_key('latitude') and tmp_location.has_key('longitude'):
location = tmp_location
return location
def urlopen(self, url, context = None):
"""Perform HTTP GET/POST on the specified URL and return the resultant
status code and response.
"""
status_code = 200
request = urllib2.Request(url = url)
if context:
data = urllib.urlencode(context)
request.add_data(data)
response = ''
try:
response = urllib2.urlopen(request).read()
except urllib2.HTTPError, e:
status_code = e.code
except urllib2.URLError:
pass
try:
self.urlopen_count += 1
except AttributeError:
self.urlopen_count = 1
self.print_debug("[%d] url = %s, status_code = %d" % (self.urlopen_count, url, status_code))
return (status_code, response)
def print_debug(self, msg):
"""Print debug message to standard output."""
if self.debug:
print "[DEBUG %s] %s" % (datetime.datetime.now(), msg)
def main():
usage = """%prog --ip_address=IP_ADDRESS"""
cmdparser = optparse.OptionParser(usage, version=("traceroute " + __version__))
cmdparser.add_option("-i", "--ip_address", type="string", default="", help="IP address of destination host")
cmdparser.add_option("-d", "--debug", action="store_true", default=False, help="Show debug output")
(options, args) = cmdparser.parse_args()
if options.ip_address:
traceroute = Traceroute(ip_address=options.ip_address, debug=options.debug)
hops = traceroute.traceroute()
hops = json.dumps(hops, indent=2)
print hops
else:
cmdparser.print_usage()
return 0
if __name__ == '__main__':
sys.exit(main())