#!/usr/bin/python

import urllib2 as urllib
from os import path, stat
from datetime import datetime
from time import time

#URL = 'http://www.tfl.gov.uk/tfl/livetravelnews/realtime/by-date.aspx?offset=weekend'
TUBE_URL = 'http://www.tfl.gov.uk/tfl/livetravelnews/realtime/tube/default.html'
#DLR_URL = 'http://www.tfl.gov.uk/tfl/livetravelnews/realtime/dlr/dlr-all-weekend.html'
DLR_URL = 'http://www.tfl.gov.uk/tfl/livetravelnews/realtime/dlr/default.html'

OUTFILE = 'Closures.txt'

TUBE_GOOD_SERVICE = ['Good service']
TUBE_PART_CLOSURE = ['Part closure', 'Part suspended']
TUBE_DELAYS = ['Severe delays']
TUBE_PLANNED_CLOSURE = ['Planned closure']
TUBE_LINE_ID = '<dt class="'
TUBE_STATION_ID = '<h3>'
TUBE_STATION_END_ID = '<dt>Station maintenance</dt>'
TUBE_LINES = 'Tube lines'
TUBE_STATIONS = 'Closed stations'

DLR_NAME = 'DLR'
DLR_MESSAGE_ID = '<div class="message">'

LINES = ['Bakerloo', 'Central', 'Circle', 'District', 'Hammersmith & City', 'Jubilee', 'Metropolitan', 'Northern', 'Piccadilly', 'Victoria', 'Waterloo & City']

LINE_REPLACEMENTS = [['H\'smith', 'Hammersmith']]
STATION_REPLACEMENTS = [['Bank','Bank & Monument'], ['Monument','Bank & Monument'], ['Station', ''], ['Earls Court', 'Earl\'s Court']]
INFO_REPLACEMENTS = [['</p><P>', ''], [' between ', ' from '], [' and ', ' to ']]

def parse_routeinfo(msg):
	routes = []
	origin = ""
	destination = ""
	for repl in INFO_REPLACEMENTS:
		msg = msg.replace(repl[0], repl[1])
	msg = msg.split('Service')[0] + ' .'
	bus = msg.find(' bus ') > -1
	while msg != "":
		keyword = 'from ';
		s = msg[msg.find(keyword) + len(keyword):].strip()
		keyword = 'to ';
		origin = s[:s.find(keyword)].strip()
		s = s[s.find(keyword) + len(keyword):].strip()
		s = s.split()
		for i in range(len(s)):
			if not s[i][0].isupper():
				destination = ' '.join(s[0:i])
				msg = ' '.join(s[i:])
				break
		if origin == "" or destination == "":
			msg = ""
		else:
			origin = origin.split('.')[0]
			destination = destination.split('.')[0]
			for repl in STATION_REPLACEMENTS:
				origin = origin.replace(repl[0], repl[1])
				destination = destination.replace(repl[0], repl[1])
			if not bus:
				routes.append("%s,%s" % (origin, destination))
			else:
				routes.append("%s,%s,bus" % (origin, destination))
	return routes

def outdated(fname):
	if not path.exists(fname):
		return True
	today = datetime.now()
	today = "%d%d%d" % (today.year, today.month, today.day)
	filestamp = datetime.fromtimestamp(stat(fname).st_mtime)
	filestamp = "%d%d%d" % (filestamp.year, filestamp.month, filestamp.day)
	return today != filestamp

def old(fname):
	if not path.exists(fname):
		return True
	return (time() - stat(fname).st_mtime) > 1800

def get_info(msg):
	s = msg.split('>')
	if len(s) >= 2:
		s = s[1].split('<')
		if len(s) >= 2:
			s = s[0].replace('&amp;', '&')
			for repl in LINE_REPLACEMENTS:
				s = s.replace(repl[0], repl[1])
			return s.strip()
	return ""

def parse_affected(routename, portions):
	affected = []
	if routename != "":
		for portion in portions:
			affected.append("%s,%s" % (routename, portion))
	return affected

def get(fname=OUTFILE):
	mode = ""
	affected = ""
	planned_closures = ""
	part_closures = ""
	delays = ""
	station_closures = ""
	interchange_closures = ""

	bOld = old(fname)
	if (bOld or outdated(fname)):
		try:
			print "Fetching data from TfL website..."
			f = urllib.urlopen(TUBE_URL, None)
			tube_lines = f.readlines()
			f.close()
			f = urllib.urlopen(DLR_URL, None)
			dlr_lines = f.readlines()
			f.close()
			print "Parsing data..."
			for i in range(len(tube_lines)):
				s = get_info(tube_lines[i])
				if s != "":
					if s == TUBE_LINES:
						mode = "T"
						print "  (1/3) Tube lines."
					elif s == TUBE_STATIONS:
						mode = "S"
						print "  (2/3) Tube stations."
					else:
						if mode == "T":
							line = tube_lines[i].strip()
							if line != "":
								if line[0:len(TUBE_LINE_ID)] == TUBE_LINE_ID:
									affected = get_info(line)
								else:
									if s in TUBE_PART_CLOSURE:
										i += 2
										part_closures += ';'.join(parse_affected(affected, parse_routeinfo(tube_lines[i]))) + ';'
									if s in TUBE_PLANNED_CLOSURE:
										planned_closures += affected + ';'
									if s in TUBE_DELAYS:
										delays += affected + ';'
						elif mode == "S":
							line = tube_lines[i].strip()
							if line == TUBE_STATION_END_ID:
								mode = ""
							if line != "":
								if line[0:len(TUBE_STATION_ID)] == TUBE_STATION_ID:
									for repl in STATION_REPLACEMENTS:
										affected = get_info(line).replace(repl[0], repl[1]).strip()
									i += 2
									line = tube_lines[i]
									interchange = False
									for tube_line in LINES:
										if line.find(tube_line) != -1:
											interchange = True
											interchange_closures += "%s,%s,*;" % (affected, tube_line)
									if not interchange:
										station_closures += affected + ";"
			print "  (3/3) DLR."
			for i in range(len(dlr_lines)):
				if dlr_lines[i][0:len(DLR_MESSAGE_ID)] == DLR_MESSAGE_ID:
					if s.find('suspended') > -1:
						s = get_info(dlr_lines[i+1]).strip()
						affected = ';'.join(parse_affected(DLR_NAME, parse_routeinfo(s)))
						if part_closures != "":
							part_closures = ';' + part_closures
						part_closures += affected
		except IOError:
			bOld = False
			print "Could not fetch data."
	if part_closures == "":
		part_closures = ";"
	if station_closures == "":
		station_closures = ";"
	if interchange_closures == "":
		interchange_closures = ";"
	if delays == "":
		delays = ";"

	if (not path.exists(fname)) or outdated(fname) or bOld:
		try:
			print "Writing file..."
			f = open(fname, 'w')
			f.write(planned_closures + part_closures + '# Line closures\n')
			f.write(delays + '# Delays\n')
			f.write(station_closures + '# Station closures\n')
			f.write(interchange_closures + '# Interchange closures')
			f.close()
			print "Done."
		except IOError:
			print "Could not fetch data."

if __name__ == "__main__":
	get()
