"""
This script was used to mirror the original data from the CALIFA upstream.

The URLs in here will no longer work when you read this.

Basically, it's ftpmirror.py from the python distribution tailored for the task
at hand.
"""

import os
import sys
import time
import getopt
import ftplib
import netrc
import random
from fnmatch import fnmatch

verbose = 2 # 0 for -q, 2 for -v
rmok = False
skippats = ['.', '..', '.mirrorinfo']

host = "ftp.caha.es"
port = 21

def getConn():
    f = ftplib.FTP()
    f.connect(host, port)
    f.login()
    return f

def mirrorDir(remotedir, localdir):
    f = getConn()
    f.cwd(remotedir)
    mirrorsubdir(f, localdir)

# Core logic: mirror one subdirectory (recursively)
def mirrorsubdir(f, localdir):
    pwd = f.pwd()
    if localdir and not os.path.isdir(localdir):
        try:
            makedir(localdir)
        except os.error, msg:
            print "Failed to establish local directory", repr(localdir)
            return
    infofilename = os.path.join(localdir, '.mirrorinfo')
    try:
        text = open(infofilename, 'r').read()
    except IOError, msg:
        text = '{}'
    try:
        info = eval(text)
    except (SyntaxError, NameError):
        print 'Bad mirror info in', repr(infofilename)
        info = {}
    subdirs = []
    listing = []
    f.retrlines('LIST', listing.append)
    filesfound = []
    for line in listing:
        # Parse, assuming a UNIX listing
        words = line.split(None, 8)
        if len(words) < 6:
            continue
        filename = words[-1].lstrip()
        i = filename.find(" -> ")
        if i >= 0:
            # words[0] had better start with 'l'...
            print 'Found symbolic link %r' % (filename,)
            linkto = filename[i+4:]
            filename = filename[:i]
        infostuff = words[-5:-1]
        mode = words[0]
        skip = 0
        for pat in skippats:
            if fnmatch(filename, pat):
                skip = 1
                break
        if skip:
            continue
        if mode[0] == 'd':
            subdirs.append(filename)
            continue
        filesfound.append(filename)
        if info.has_key(filename) and info[filename] == infostuff:
            continue
        fullname = os.path.join(localdir, filename)
        if os.path.exists(fullname):
                print "Skipping %s, already exists"%fullname
                continue
        tempname = os.path.join(localdir, '@'+filename)
        try:
            os.unlink(tempname)
        except os.error:
            pass
        if mode[0] == 'l':
            try:
                os.symlink(linkto, tempname)
            except IOError, msg:
                print "Can't create %r: %s" % (tempname, msg)
                continue
        else:
            try:
                fp = open(tempname, 'wb')
            except IOError, msg:
                print "Can't create %r: %s" % (tempname, msg)
                continue
            fp1 = fp
            t0 = time.time()
            print "Downloading %s"%fullname
            try:
                f.retrbinary('RETR ' + filename,
                             fp1.write, 8*1024)
            except ftplib.error_perm, msg:
                print msg
            t1 = time.time()
            bytes = fp.tell()
            fp.close()
            if fp1 != fp:
                fp1.close()
        try:
            os.unlink(fullname)
        except os.error:
            pass            # Ignore the error
        try:
            os.rename(tempname, fullname)
        except os.error, msg:
            print "Can't rename %r to %r: %s" % (tempname, fullname, msg)
            continue
        info[filename] = infostuff
        writedict(info, infofilename)
        time.sleep(10+random.randint(5, 50))
    # Remove files from info that are no longer remote
    deletions = 0
    for filename in info.keys():
        if filename not in filesfound:
            if verbose:
                print "Removing obsolete info entry for",
                print repr(filename), "in", repr(localdir or ".")
            del info[filename]
            deletions = deletions + 1
    if deletions:
        writedict(info, infofilename)
    #
    # Remove local files that are no longer in the remote directory
    try:
        if not localdir: names = os.listdir(os.curdir)
        else: names = os.listdir(localdir)
    except os.error:
        names = []
    for name in names:
        if name[0] == '.' or info.has_key(name) or name in subdirs:
            continue
        skip = 0
        for pat in skippats:
            if fnmatch(name, pat):
                skip = 1
                break
        if skip:
            continue
        fullname = os.path.join(localdir, name)
        if not rmok:
            if verbose:
                print 'Local file', repr(fullname),
                print 'is no longer pertinent'
            continue
        if verbose: print 'Removing local file/dir', repr(fullname)
        remove(fullname)
    #
    # Recursively mirror subdirectories
    for subdir in subdirs:
        localsubdir = os.path.join(localdir, subdir)
        pwd = f.pwd()
        try:
            f.cwd(subdir)
        except ftplib.error_perm, msg:
            print "Can't chdir to", repr(subdir), ":", repr(msg)
        else:
            mirrorsubdir(f, localsubdir)
            f.cwd('..')
        newpwd = f.pwd()
        if newpwd != pwd:
            print 'Ended up in wrong directory after cd + cd ..'
            print 'Giving up now.'
            break

# Helper to remove a file or directory tree
def remove(fullname):
    if os.path.isdir(fullname) and not os.path.islink(fullname):
        try:
            names = os.listdir(fullname)
        except os.error:
            names = []
        ok = 1
        for name in names:
            if not remove(os.path.join(fullname, name)):
                ok = 0
        if not ok:
            return 0
        try:
            os.rmdir(fullname)
        except os.error, msg:
            print "Can't remove local directory %r: %s" % (fullname, msg)
            return 0
    else:
        try:
            os.unlink(fullname)
        except os.error, msg:
            print "Can't remove local file %r: %s" % (fullname, msg)
            return 0
    return 1


# Create a directory if it doesn't exist.  Recursively create the
# parent directory as well if needed.
def makedir(pathname):
    if os.path.isdir(pathname):
        return
    dirname = os.path.dirname(pathname)
    if dirname: makedir(dirname)
    os.mkdir(pathname, 0777)

# Write a dictionary to a file in a way that can be read back using
# rval() but is still somewhat readable (i.e. not a single long line).
# Also creates a backup file.
def writedict(dict, filename):
    dir, fname = os.path.split(filename)
    tempname = os.path.join(dir, '@' + fname)
    backup = os.path.join(dir, fname + '~')
    try:
        os.unlink(backup)
    except os.error:
        pass
    fp = open(tempname, 'w')
    fp.write('{\n')
    for key, value in dict.items():
        fp.write('%r: %r,\n' % (key, value))
    fp.write('}\n')
    fp.close()
    try:
        os.rename(filename, backup)
    except os.error:
        pass
    os.rename(tempname, filename)


if __name__ == '__main__':
    mirrorDir("CALIFA/reduced",
    	"/GavoInput/califa/data/images/data")
