# DE_BERLIN_1 data was recorded with local time rather than UTC.
# This script was used to fix the input files.  It processes *all*
# csv.gz files and assumes the time stamps are local time in Berlin,
# including DST.
#
# It writes out new files with -fixed appended.  Rename these
# to the stem when satisfied.

import datetime
import glob
import gzip
import os
import shutil
import sys

import pytz

SRC_TIMEZONE = pytz.timezone("Europe/Berlin")
DATE_FORMAT = "%d.%m.%Y"
TIME_FORMAT = "%H:%M:%S"


def doConversion(src, dest):
# We're not using an actual csv parser since we try to capture as much of
# the input format as possible, and it's not fancy csv anyway.
  for ln in src:
    try:
      rawDate, rawTime, rest = ln.split(";", 2)
      localTime = SRC_TIMEZONE.localize(
        datetime.datetime.strptime(rawDate+";"+rawTime,
          DATE_FORMAT+";"+TIME_FORMAT))
      utcTime = localTime.astimezone(pytz.utc)
      dest.write("%s;%s;%s"%(
        utcTime.strftime(DATE_FORMAT),
        utcTime.strftime(TIME_FORMAT),
        rest))
    except ValueError:  # some weird line without datetime, probably a comment
      dest.write(ln)


def convertOne(fName):
  srcFile = gzip.open(fName)
  destFile = gzip.open(fName+"-fixed", "w")
  try:
    doConversion(srcFile, destFile)
  finally:
  	destFile.close()
  srcFile.close()
  shutil.copymode(fName, fName+"-fixed")
  print "Done %s"%fName


if __name__=="__main__":
  for fName in glob.glob("*.csv.gz"):
    convertOne(fName)

# vim:et:sw=2:sta
