#!/usr/bin/python

#Copyright (c) 2015 A. Mennucci
#License: GNU GPL v2

# see the discussion in http://git-annex.branchable.com/bugs/migrate_and_move_duplicates_data/

import sys, os, subprocess, getopt, json, stat

# TODO : it may be easily extended to other hashes pairs e.g. SHA512E or SHA512

__doc__ = """Usage: git-annex_de-re-link_hash-E { -L | -R | -D } [ -v ] [ -k ] dir [dir...]
Suppose that a file has key XXX (either SHA256E or SHA256); call YYY the
other type of key for the same file  (either SHA256 or  SHA256E)
This programs check if XXX and/or YYY are present in the local
annex repository, then takes some actions.
  -L  if both keys exist, hardlink them
  -R  if the file key does not exist, but the other type exist,
      use the YYY to build the file key XXX
  -D  if the file key XXX exists and also the other type YYY exist,
      drop the YYY key
  -DD as above, but also applies when XXX does not exist in this annex
      but there are enough copies of it around
Note when -D is specified, then -L is not performed.
Other options:
  -k  no-act
  -v  verbose
"""

verbose=0
act=True

remote=True

actionL=False
actionR=False

actionD=0


opts,args=getopt.getopt(sys.argv[1:],'vkhLRD')

for o,v in opts:
    if o == '-v' : verbose+=1
    elif o == '-k' : act=False
    ##elif o == '-r' : remote=True
    elif o == '-L' : actionL = True
    elif o == '-D' : actionD += 1
    elif o == '-R' : actionR = True
    elif o == '-h' :
        sys.stderr.write(__doc__ )
        sys.exit(0)

if not (verbose or actionL or actionR or actionD):
    sys.stderr.write('Running with no options is not really useful. Try -h.\n')
    sys.exit(1)

if not os.path.isdir(os.path.join(os.getcwd(),".git")):
    sys.stderr.write('Must be ran from root GIT directory.\n')
    sys.exit(1)

GAO='.git/annex/objects/'

if len(args) < 1:
    sys.stderr.write('Please specify directories.\n')
    sys.exit(0)

def aek(key):
    args=['git','annex','examinekey','--json',key]
    spo=subprocess.Popen(args,stdout=subprocess.PIPE)
    j=json.loads(spo.stdout.read())
    if spo.wait() != 0 : print '** failed examinekey',key
    return j

class verbohelp(object):
    " print message only once"
    def __init__ (self, msg, verbosity=1):
        self.msg = msg
        self.doprint = verbose >= verbosity
    def __call__(self):
        if self.doprint: print self.msg
        self.doprint = False

def act_on_pair(fil, fk,fo,fc,  sk,so,sc):
    " fil=filename ,\n\
 fk= first key ie XXX, fo = first file object in annex , fc= hash used \n\
 sk=  2nd  key ie YYY, so = 2nd   file object in annex , sc= hash used "
    assert fc in ('SHA256E','SHA256')
    assert sc in ('SHA256E','SHA256')
    assert fc != sc
    assert fk[:len(fc)+1] == fc+'-'
    assert sk[:len(sc)+1] == sc+'-'
    assert os.path.islink(fil)
    assert os.readlink(fil)[-len(fo):] == fo
    manifest = False
    vc=verbohelp(('This file has key %r : %r' % (fc, fil)))
    vs=verbohelp((' In this annex there is the  key %r as %r' % (sc, so)))
    if os.path.isfile(fo):
        vc()
        if verbose >= 2 : print(' And it is present in this annex')
    else:
        vc()
        print(' But is not in this annex')
    if os.path.isfile(so):
        vc()
        vs()
    elif verbose >= 2:
        print(' There is no key of type %s for this file in this annex' % sc)
    # motore azione
    if actionR and os.path.isfile(so) and not os.path.isfile(fo):
        assert os.path.islink(fil) and not os.path.exists(fil)
        print('Rebuilding key %r for %r' % ( fc, fil))
        if act:
            s=os.path.dirname(fo)
            try:
                if not os.path.isdir(s) : os.makedirs(s)
                os.link(so,fo)
                # this trickery has git-annex recognizing the new key
                ar=['git','annex','fsck','--fast',fil]
                spcheck=subprocess.Popen(ar)
                if spcheck.wait() != 0 : print '** failed',ar
            except Exception, e:
                print '**', fil
                print '**', e
            assert os.path.islink(fil) and os.path.exists(fil)
    elif (actionL and not actionD ) and os.path.isfile(so) and os.path.isfile(fo):
        sos=os.stat(so)
        fos=os.stat(fo)
        assert fos.st_size ==  sos.st_size   #paranoi
        assert fos.st_dev ==  sos.st_dev   #paranoi
        if fos.st_ino ==  sos.st_ino:
            if verbose:
                print ('This file is already linked '+ repr(fil))
        else:
            print ('Linking duplicate keys for ' + repr(fil))
            if act:
                s=os.path.dirname(fo)
                try:
                    gs =os.stat(s)
                    om=stat.S_IMODE(gs.st_mode)
                    assert om in (0555, 01555)
                    os.chmod(s,0755)
                    os.rename(fo,fo+"~~")
                    try:
                        os.link(so,fo)
                        os.unlink(fo+"~~")
                        os.chmod(s,om)
                    except Exception, e:
                        print '**', fil
                        print '**',e
                        os.rename(fo+"~~",fo)
                        os.chmod(s,om)
                except Exception, e:
                    print '**', fil
                    print '**', fo
                    print '**', so
                    print '**',e
    if  actionD  and os.path.isfile(so):
        safe = os.path.exists(fil)
        assert os.path.islink(fil)
        if (actionD > 1 ) and not safe:
            # if it is not here, check there are enough copies...
            ar=['git','annex','fsck','--fast',fil]
            spfsk=subprocess.Popen(ar)
            if spfsk.wait() != 0 :
                print '** not enough copies of ',fil
                safe = False
            else:
                safe = True
        if safe:
            print (' Dropping redundant key for file '+ repr(fil)+ ' : '+ repr(sk))
        if act and safe:
            ar=['git','annex','dropkey','--force',sk]
            spcheck=subprocess.Popen(ar)
            if spcheck.wait() != 0 : print '** failed',ar

def scan(arg):
    findcommand=['git','annex','find']
    if remote : findcommand += ['--include','*']
    findcommand += ['--json',arg]
    sp=subprocess.Popen(args=findcommand,stdout=subprocess.PIPE)
    for a in sp.stdout:
        j=None
        try:
            j=json.loads(a)
        except Exception, e:
            print '**', repr(a)
            print '**', e
            continue
        fil=j['file']
        assert os.path.islink(fil)
        if j["backend"] == "SHA256E":
            # first object
            first_key=j['key']
            fo=GAO + j['hashdirmixed']+first_key+'/'+first_key
            #         +'/SHA256-s'+j['bytesize']+'--'+j['keyname']
            # second object
            second_key='SHA256-s'+j['bytesize']+'--'+j['keyname'][:64]
            z=aek(second_key)
            so=GAO + z['hashdirmixed']+second_key+'/'+second_key
            act_on_pair(fil, first_key,fo,'SHA256E',  second_key,so,'SHA256')
        elif j["backend"] == "SHA256":
            #sys.stdout.write('**sha256_backen\t'+repr(j['file'])+'\n')
            # first object
            first_key=j['key']
            fo=GAO + j['hashdirmixed']+first_key+'/'+first_key
            #         +'/SHA256-s'+j['bytesize']+'--'+j['keyname']
            # second object
            _,ext=os.path.splitext(j['file'])
            second_key='SHA256E-s'+j['bytesize']+'--'+j['keyname']+ext
            z=aek(second_key)
            so=GAO + z['hashdirmixed']+second_key+'/'+second_key
            act_on_pair(fil, first_key,fo,'SHA256',  second_key,so,'SHA256E')
        else:
            sys.stdout.write('**other_backend\t'+fil+'\n')

scan(args[0])
