file browser

python/goffid.py

"""
    goffid.py: Google Files Folders Ids utitlities

    goFfId.py [-h] [-n name] [-p parent] cert fun plus*

    cert: the google certificate to use, hint currently inuse: a=admin@spWallisellen.ch f=fiwiko@wlkl.ch w=wa@wlkl.ch 
    
    fun: the function to perform

    Migration: replace foreign files by copies of root owner. uses 3 files
        * id: files in drive, including current googleDriveId and originId (if replaced):  goffid-<cert>.csv in driveRoot
        * act: contains changes since last id, plus proposed migration actions :           goffid-<cert>-act.csv in local directory
        * mig: files in migration process with current/future and origin ids:              goffid-<cert>-mig.csv in driveRoot

        The different steps
        * migAct: analyse drive list, id, mig and produce act, with migration proposals
        * manually: check/modify act
        * migMig: for the files in act with migration action, 
            create copies, for folders empty folders in zFremdeOrigiTemp 
                (if not already created and registered in mig)
            and append them to mig, after removing entries in mig, that are already in id
        * migSwap: for each file/folder in mig
            move the new copy from zFremdeOrigiTemp to destination folder
            for directories, move the contained elements to new folder
            move the origin to zFremdeOriginale
        * migDoc update the links in all documents (origin -> id from id and mig (if swapped)
        * migId: analyse drive list, id, mig and upload a new version of id, with
            updated list of origins
            changes since old version of id
        * mig2 steps migMig to migId in sequence

    chown user: change the owner to user 
        of files owned by cert and residing in folders owned by by user, if possible

    l
    list query: google q= ist joined from plus. attention for shell quoting, use e.g.
            ./goffid.py w list 'mimeType="application/vnd.google-apps.folder"' and "'root'" in parents


    up filename mimetype modifiedTime?; upload filename to drive 
        with name -n (default filename) in folder -p (default root) with the given modifiedTime

    updDoc docid+: update the links in the docs with the given ids, using id and mig for the mapping origin -> new

 8. 4.23 moved script to pc/bin, moved authorizations to /wkData/wk/extr2
"""



# from __future__ import print_function
import pickle
import os.path
import csv
from datetime import datetime
import time
from io import BytesIO
from io import StringIO

def err(*m):
    print('\n*****\nerror in', __file__ + ':', *m)
    x = 1 / 0

def extendReverse(l, r):
    for i in range(len(r)-1, -1, -1):  
        l.append(r[i])      

def csvWrite(fn, li, flds):
    """ write a list of dictionary to a csv file
        fn filename to write to
        li the list of dictionaries to write
        flds a string with the fieldnames to write (separated by ' ' or ','
    """
    with open(fn, 'w', newline='') as fi:
        wr = csv.DictWriter(fi, fieldnames=flds.replace(',', ' ').split())
        wr.writeheader()
        wr.writerows(li)
    print('written', len(li), 'rows to', fn)

def csvReadFi(fi, close=False):
    r = list(csv.DictReader(fi))
    if close:
        fi.close()
    return r
 
def csvRead(fn):
    with open(fn, newline='') as f:
        res = csvReadFi(f)
    print('csvRead', fn, len(res))
    return res

#####go: google tiny helpers

gNow = datetime.now().timestamp()   # seconds since epoch
gDay = 86400                        # seconds in a day
def goTst(s):                       # google date time string to timstamp
    if s[-1] == 'Z':
        return datetime.fromisoformat(s[0:-1]).timestamp()
    else:
        err(f'goTst {s} bad timestamp format')
    #    return datetime.fromisoformat('2021-07-11T12:16:47.774+00:00').timestamp()

#####go: google credentials and discovery

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
    
def goCredentials(crt):
    if type(crt) != str or len(crt) < 1:
        err(f"bad certification name: {crt} {type(crt)}")
    """ load the credentials form google OAuth2, 
            see Google APPI Console https://console.developers.google.com/?authuser=0&project=quickstart-1611556606696 
    """
    # If modifying these scopes, delete the file token.pickle.
    SCOPES =[ 'https://www.googleapis.com/auth/drive.metadata.readonly' # drive readonly
            , 'https://www.googleapis.com/auth/drive'                   # drive file update
            , 'https://www.googleapis.com/auth/documents.readonly'      # docs readonly
            #, 'https://www.googleapis.com/auth/documents'               # docs readWrite
            ]
    tokPiFi = f'token-{crt}.pickle'                                # the file to cache authorization
    credJson = '/wkData/pc/googleAPIcredentials.json'       # the credentials file generated by google API

    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists(tokPiFi):
        with open(tokPiFi, 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(credJson, SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open(tokPiFi, 'wb') as token:
            pickle.dump(creds, token)
    return creds

#####dr: google drive

# our standard fields for file (dicts)
drFlds1 = "name, id, mimeType, createdTime, modifiedTime, trashed"
drFldsAPI = drFlds1 + ", parents, owners(emailAddress)"
drFldsL = drFlds1 + ", parent owner"
drFldsO = drFldsL + ", lv path"
drFldsP = drFldsO + ", origin change act"
drMimeFolder = 'application/vnd.google-apps.folder'

def pathSort(l):
    """ sort the list of file dicts by path """
    l.sort(key=lambda f: f['path'])
    return l

def driveBuild(cert):
    global driveSrv
    driveSrv = build('drive', 'v3', credentials=goCredentials(cert))

def drInfo():
    """ print the drive info """
    global drUser, drRoot
    # Call the Drive v3 API
    ab = driveSrv.about().get(fields='user(displayName, emailAddress, kind), storageQuota').execute()
    # print(ab)   
    drUser = ab['user']['emailAddress']
    drRoot = drGet('root')
    #print('root', drRoot)
    print(ab['user']['kind'], ab['user']['displayName'], drUser
            , '; storage (MB) usage', round(int(ab['storageQuota']['usage'])/1048576), ', trash', round(int(ab['storageQuota']['usageInDriveTrash'])/1048576)
                       , ', limit', round(int(ab['storageQuota']['limit'])/1048576), 'root=', drRoot['name'], drRoot['id'])

def drFiAtts(f):
    """ return the file dict f after default massage of attributes """
    if 'parents' in f:      # add the additional fields
        if len(f['parents']) != 1:
            err('parents in', f);
        f['parent'] = f['parents'][0]
        del f['parents']
    if 'owners' in f:
        if len(f['owners']) != 1 or 'emailAddress' not in f['owners'][0]:
            err('owners in', f);
        f['owner'] =  f['owners'][0]['emailAddress']
        del f['owners']
    return f

def drGet(fid, fFlds = drFldsAPI): 
    """ get the file dict for the given id """ 
    return drFiAtts(driveSrv.files().get(fileId=fid,fields=fFlds).execute())

def drList(foId, fFlds = drFldsAPI, orderBy=None):  
    """ return a list of files and folders directly in the google drive folder foId 
        each file or folder as a dict with the given fields 
    """
    ff = []
    q = foId[2:] if foId[0:2] == 'q=' else '' if foId == '' else f"'{foId}' in parents"
    next = None
    while True: 
        res = driveSrv.files().list(
            pageSize=500, pageToken=next, fields=f"nextPageToken, files({fFlds})"
                , q=q, orderBy=orderBy
                ).execute()
        # print(f'list found {len(res)}: {res}');
        ac = res.get('files', [])
        for a in ac:
            drFiAtts(a)
        ff += ac
        next = res.get('nextPageToken')
        if next == None:
            return ff

def drListOne(pa, nm, retNone=False):
    """ return file dict for a filename in a parent folder
        return None if it does not exist
        error if multiple files or thrashed
    """
    res = drList(f"q=name='{nm}' and '{pa}' in parents")
    if len(res) == 1 and not res[0]['trashed']:
        return drFiAtts(res[0])
    elif len(res) == 0:
        return None if retNone else err(f'drListOne: no file {pa}/{nm}')
    else:
        err(f'drListOne: file {pa}/{nm} multiple or thrashed', res) 

def drWalk(af,fFlds = drFldsAPI, stop=None):
    """ iterate over all files from one or several folders in a google drive recursively, depth first
        af can be
            id of folder
            a list of id's of folders
            a dictionary of folderId => path root name
        yields drWalkLi
    """

    at = type(af)
    ff = []
    for i, pa in (af if at == dict else {af: ''} if at == str else {i: '' for i in af}).items(): # arg af to dict
        f = drGet(i, fFlds)
        f['path'] = pa
        f['lv'] = 0
        ff.append(f)
    yield from drWalkLi(ff, fFlds, stop)

def drWalkLi(ff, fFlds = drFldsAPI, stop=None):
    """ iterate over all files in list ff recursively. 
        
        the elements of ff must be a dict with fields fFlds after drFiAtts plus path
        yields a dict of the fieldnames given by fFlds (after drFiAtts), plus
            'lv' the level within the filetree, starting with 0 in the folders in af
    """
    doneOrQueued = set() if stop==None else stop
    print('drWalkLi(', [f['name'] for f in ff], fFlds, stop, ')')
    stck = []
    extendReverse(stck, ff)             # push the start list to the work stack
                                        # for lists only append and pop from the end are efficient
    cF = cT = 0
    while len(stck) > 0: 
        f = stck.pop()                  # pop next item from stck
        if f['id'] in doneOrQueued:
            print(f"--drWalkLi already doneOrQueued {f['path']} {f['lv']} {f['mimeType']} {f['id']}")
        else:
            doneOrQueued.add(f['id'])
            yield f    
            cT += 1
            if f['mimeType'] == drMimeFolder:
                cF += 1
                ch = drList(f['id'], fFlds, 'name')    # get contents of this folder
                lv = f['lv'] + 1
                pp = f['path'] + '/' if f['path'] != '' else ''
                for c in ch:
                    c['lv'] = lv 
                    c['path'] = pp + c['name']
                extendReverse(stck, ch)                # push the children to the work stack
                print(f'--drWalkLi {cF}/{cT} stack {len(stck)} level {f["lv"]} beginning {f["path"]}')
    print(f'--drWalkLi {cF}/{cT} end of iterator')

def drCopy(id, pa, fn):
    """ copy google drive file with id into folder pa with new name nm and return id of new copy """
    # gen = driveSrv.files().generateIds(count=2).execute() !!! do not use fails with: Generated IDs are not supported for Docs Editors formats.
    res = driveSrv.files().copy(fileId=id, fields=drFldsAPI, body={'name':fn, 'parents': [pa]}).execute()
    print('copy result', fn, res)
    return drFiAtts(res)

def drCreate(pa, nm, mime=drMimeFolder):
    """ create a file in folder pa, with the give name and mimeType, withoud data """
    return drFiAtts(driveSrv.files().create(body={'name': nm, 'parents': [pa], 'mimeType': mime}, fields=drFldsAPI).execute())

def drUploadVers(fn, pa, mime, dt=None, nm=None):  
    """
        upload a new version of a file to googleDrive or if it does not exist yet, create the file
        set keepRevisionForever=True, otherwise the revision will soon disappear, which is not what we need here
        setting the modifiedTime, will set this as the upload time of the version and order the revisions by this upload time
    """

    if nm == None:
        nm = fn
    if dt != None:  #format RFC 3339: 2020-03-17T09:35:22.771Z
        if len(dt) == 10:
            dt += 'T00:00:00'
        if dt[-1] != 'Z':
                dt += 'Z'
    old = drListOne(pa, nm, True)
    media = MediaFileUpload(fn, mimetype=mime)
    if old == None:
        body = {'name': nm, 'parents': [pa]}
        if dt != None: 
            body['modifiedTime'] = dt
        new = driveSrv.files().create(keepRevisionForever=True, body=body, media_body=media, fields=drFldsAPI).execute()
    else:
        new = driveSrv.files().update(fileId=old['id'], keepRevisionForever=True, media_body=media
            , body= {} if dt == None else {'modifiedTime': dt}, fields=drFldsAPI).execute()
    drFiAtts(new)
    print('drUploadVers uploaded', 'file' if old == None else 'version', new)
    return new

def drMove(id, oldPa, newPa):   
    # print('drMove', id, 'in', oldPa, 'to', newPa)
    try:
        res = driveSrv.files().update(fileId=id, removeParents=oldPa, addParents=newPa, fields=drFldsAPI).execute()
    except Exception as e:
        print(f"***\n*** error drMove({id}, {oldPa}, {newPa})\n*** exception:", e)
        print('*** drget', id, drGet(id))
        raise e
    drFiAtts(res)
    if res['parent'] != newPa:
        print('drMove not in new parent', oldPa, 'to', newPa, 'res', res)        
    print('drMove', oldPa, 'to', newPa, 'res', res)
    return res

def drDownload(sid, fn=None):
    """ download a google drive file with id sid
        if fn == '' then return a StringIO (buffer in memory)
        else return write it to the given filename fn and return filename
    """  
    fh = BytesIO() if fn == None else open(fn, 'wb') 
    req = driveSrv.files().get_media(fileId=sid)
    downloader = MediaIoBaseDownload(fh, req)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        # print("Download %d%%." % int(status.progress() * 100))
    # print('drDownload', status)
    if fn == None:
        return StringIO(str(fh.getbuffer(), encoding='utf-8', errors='strict'))
    else:
        fh.close()
        return fn

def drChown(fi, us):
    """ change the owner of file with fileDict fi to user us """
    print('drChown changing', fi['path'], 'owner to', us)
    prms = driveSrv.permissions().list(fileId=fi['id'], fields='*').execute()['permissions']
    #print('prms', prms)
    pN = [p for p in prms if 'emailAddress' in p and p['emailAddress'] == us]
    # print('pN ***', len(pN), pN)
    try:
        if len(pN) >= 1:
            res = driveSrv.permissions().update(fileId=fi['id'], permissionId=pN[0]['id'], transferOwnership=True, body={'role': 'owner'}).execute()
        else:
            res = driveSrv.permissions().create(fileId=fi['id'], transferOwnership=True, body={'role': 'owner', 'type': 'user', 'emailAddress': us}).execute()
    except BaseException as e:
        err('drChown for', fi['path'], 'update/create except', e, '\nfile', fi)
    # print(res)
    nn = drGet(fi['id'])
    # print('after', nn)
    if fi['name'] != nn['name'] or fi['id'] != nn['id'] :
        err('drChown mismatch fi', fi, '<==>', nn)
    elif us == nn['owner']:
        pass # print(fi['path'], fi['id'], 'owner from', fi['owner'], 'changedTo', nn['owner'], nn)
    else:
        err('could not chown fi', fi, 'to nn', nn, 'permissions', prms)

###doc: google docs

def docsBuild(cert):
    global docsSrv
    docsSrv = build('docs', 'v1', credentials=goCredentials(cert))

    """ metadata for docLinks for document structure, a list of m describing a document ele e ( a dict) as follows
            [ key: if key in e, then use this m to describe e
            , eKeys: set of allowed keys in e
            , sKeys: if a set the go to subelement s = e[key] and the allowed keys in s are sKeys elif sKey=None s=e
            , sty: process the style s[sty]
            , chi: a list of subelements to process as document ele: for ck in chi: for c in s[ck]: recursively process docElement c
            ]
    """
docMeta = [ ['paragraph', {'paragraph', 'startIndex', 'endIndex'}, {'paragraphStyle', 'suggestedParagraphStyleChanges', 'elements', 'bullet', 'suggestedBulletChanges', 'positionedObjectIds', 'suggestedPositionedObjectIds'}, 'paragraphStyle', ['elements']]
           , ['textRun', {'textRun', 'startIndex', 'endIndex'}, {'content', 'textStyle', 'suggestedInsertionIds', 'suggestedDeletionIds', 'suggestedTextStyleChanges'}, 'textStyle', []]
           , ['pageBreak', {'pageBreak', 'startIndex', 'endIndex'}, {'textStyle'}, 'textStyle', []]
           , ['sectionBreak', {'sectionBreak', 'startIndex', 'endIndex'}, {'sectionStyle'}, 'sectionStyle', []]
           , ['tableOfContents', {'startIndex', 'endIndex', 'tableOfContents'}, {'content'}]
           , ['footnoteReference', {'startIndex', 'endIndex', 'footnoteReference'}, None]
           , ['inlineObjectElement', {'startIndex', 'endIndex', 'inlineObjectElement'}, {'inlineObjectId', 'textStyle'}, 'textStyle', []]
           , ['table', {'startIndex', 'endIndex', 'table'}, {'rows', 'columns', 'tableRows', 'tableStyle'}, 'tableStyle', ['tableRows']]
           , ['tableCells', {'startIndex', 'endIndex', 'tableCells', 'tableRowStyle'}, None, 'tableRowStyle', ['tableCells']] 
           , ['content', {'startIndex', 'endIndex', 'content', 'tableCellStyle'}, None, 'tableCellStyle', ['content']]
           , ['horizontalRule', {'startIndex', 'endIndex', 'horizontalRule'}, {'textStyle'}, 'textStyle', []]
           ]

def docLinks(f):
    """ iterate over the links in the given by the dict for a google drive filedocument
        yield the element {startIndex: , and endIndex: , ... }, url
    """
    doc = docsSrv.documents().get(documentId=f['id']).execute()
    # print('document title', doc.get('title'), 'path', f['path'])
    cont = doc.get('body').get('content')
    # print('---\n content', cont, '\n---')
    stck = []
    extendReverse(stck, cont)
    while len(stck) > 0:
        c = stck.pop()
        for m in docMeta:
            if m[0] in c:
                break
        else:
            err('unknown content', c.keys(), c)
        if not set(c.keys()) <= m[1]:
            err('unknown keys in c', m, c.keys(), c)
        if m[2] == None:
            s = c
        else:
            s = c[m[0]]   # go to nested subelement
            if not set(s.keys()) <= m[2]:
                err('bad keys in', m[0], m, '\n  s.keys', s.keys(), '\n  content', c)
        if len(m) < 4:
            print('+++ ignoring', m[0])
            continue
        # print('*', m[0].ljust(15), c['startIndex'] if 'startIndex' in c else '-', c['endIndex'])
        if 'link' in s[m[3]]:
            if 'url' in s[m[3]]['link']:
                yield c, s[m[3]]['link']['url']
            elif 'headingId' not in s[m[3]]['link'] and 'bookmarkId' not in s[m[3]]['link']:
                err('no url in link', c, s)
        for e in m[4]:
            extendReverse(stck, s[e])
    return

def goUrlId(u):
    uL = u.lower()
    # find the start of the google id in url
    sx = -1
    if uL.find('google.com/') < 0 :
        return None, None
    for p in ['google.com/document/d/', 'docs.google.com/document/u/0/d/', 'docs.google.com/document/u/1/d/', 'docs.google.com/document/u/2/d/', 'google.com/spreadsheets/d/', 'google.com/file/d/', 'google.com/drive/u/0/folders/', 'google.com/drive/u/1/folders/', 'google.com/drive/u/2/folders/', 'google.com/drive/folders/'
                , 'google.com/presentation/d/', '?id=', '&id=']:
        sx = uL.find(p)
        if sx >= 0:
            sx += len(p)
            break
    if sx < 0:
        if uL in {'https://drive.google.com', 'https://drive.google.com/drive', 'https://drive.google.com/drive/shared-with-me'}:
            return None, None
        for p in ['accounts.google.com', 'calendar.google.com/', 'groups.google.com/', 'meet.google.com/', 'takeout.google.com/', '@googlegroups']:
            if uL.find(p) >= 0:
                return None, None
        err('cannot migrate url', u)
    # find the end of the google id in url
    for t in '/?#&':
        ex = uL.find(t, sx)
        if ex >= sx:
            break
    else:
         ex = len(uL)
    return sx, ex

def docUpdLinks(f, o2n):
    req = []
    txt = []
    cntL = 0
    print('docUpdlinks', f['path'], f['id'], 'begin')
    for e, u in docLinks(f):
        cntL += 1
        sx, ex = goUrlId(u) # find google id in url 
        if sx == None:
            print('  ignoring url', u, 'at', e['startIndex'], e['endIndex'])
        else:
            old = u[sx: ex]
            if old not in o2n:
                print('  keeping url', u, 'id=', old, 'at', e['startIndex'], e['endIndex'])
            else: 
                new = o2n[old]
                ln = u[0: sx] + new + u[ex:]
                if 'textRun' in e and 'content' in e['textRun']:
                    ix = e['textRun']['content'].find(old) 
                    if ix >= 0:
                        t = (e['startIndex']+ix, e['startIndex']+ix+len(old), new)
                        if t[0] < e['startIndex'] or t[1] > e['endIndex']:
                            err('boundary in text', old, new, t, e)
                        txt.append(t)
                print('  updating url', u, 'id=', old, '==>', new, 'at', e['startIndex'], e['endIndex'])
                req.append({'updateTextStyle': {'textStyle': {'link': {'url': ln}}, 'fields': 'link.url', 'range': {'startIndex': e['startIndex'], 'endIndex': e['endIndex']}}}) 
    print('docUpdlinks', f['path'], len(req), 'updates', 'of', cntL, 'links and', len(txt), 'texts')

    dlt = 0 # warning startIndex etc. change with text modifications!
    for t in txt:
        req.append({'insertText': {'text': t[2], 'location': {'index': t[1]+dlt}}}) 
        req.append({'deleteContentRange': {'range': {'startIndex': t[0] + dlt, 'endIndex': t[1]+dlt}}}) 
        dlt += len(t[2]) - t[1] + t[0]
    if len(req) > 0:
        # print('req', len(req), req)
        rs = docsSrv.documents().batchUpdate(documentId=f['id'], body={'requests' : req}).execute()
        # print('migrated', len(req), "req's, response", rs)
    return len(req)

###mig: Migrate foreign files and keep track of id's, see module __doc__

def migRead(goRo, fn, stop=None):
    if stop != None:
        l = list(drWalk(goRo, stop=stop))
    elif goRo == '':
        l = csvRead(fn)
    else:
        l = csvReadFi(drDownload(drListOne(goRo, fn)['id'])) 
    i = {}
    o = {}
    for e in l:
        i[e['id']] = e if e['id'] not in i else err(fn, 'duplicate id', e)
        if 'origin' not in e:
            e['origin'] = ''
        if len(e['origin']) >= 1:
            o[e['origin']] = e if e['origin'] not in o else err(fn, 'duplicate origin', e)
        if 'kind' in e:
            del e['kind']
        if 'chFrom' in e:
            del e['chFrom']
        e['trashed'] = str(e['trashed'])  # after csv boolean is migrated to str - and then compares do not work!
    print(fn, 'len', len(l), len(i), len(o))
    # pathSort(l)
    return l, i, o

def migMerge(root, user, stop, idF, migF):
    """ used for migragion phases Act and Id
        create a copy of all foreign files from drWalk(fld) into fT
        return a list of all files, the newly created copies linked to origin an marked with trashed= migCre
    """
    print(f'\nmigmerge root={root}\n, user={user}\n, stop={stop}\n, idF, migF, actF')
    aL, aI, aO = migRead(root['id'], '*drive*', stop)
    # print('test aI', '1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo', aI['1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo'])
    idL, idI, idO = migRead(root['id'], idF)
    # print('test idI', '1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo', idI['1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo'])
    migL, migI, migO = migRead(root['id'], migF)


    for o,i in idO.items():  # transfer origins from old id
        if o in aI:
           err('old id origin in act', i)
        elif i['id'] not in aI:
            print('???idO id not in aI', i)
        else:
            a = aI[i['id']]
            if a['origin'] != '':
                err('origin already set, old id', i, 'in act', a)
            a['origin'] = o
            aO[o] = a
            print('orgin added to act from old id', i)

    for o,m in migO.items():  # transfer origins from mig
        if o in aI:
            print('mig origin', 'and id' if m['id'] in aI else '', ' in act', m)
        elif m['id'] not in aI:
            err ('mig: neither id nor origin in act', m)
        else:
            a = aI[m['id']]
            if a['origin'] != '':
                if o != a['origin']:
                    err('mig origin mismatch, mig=', m, 'act=', a)
                else:
                    print('mig same origin', m)
            else:
                a['origin'] = o
                aO[o] = a
                print('orgin added to act from mig', m)

    for a in aL:   
        ch = '' # compute change
        cp = None
        if a['id'] in aO:
           err('act id in act origin', a)
        elif a['id'] not in idI:
            if a['origin'] in idI:
                ch += ' o=-'
                cp = idI[a['origin']]
                if cp['origin'] != '':
                    err('swapped but origin already swapped\n  act', a, '\n  origin', cp)
            else:
                ch += ' +=-'
        else:
            cp = idI[a['id']]
        if cp != None:
            if a['name'] != cp['name']:
                ch += ' n=' + cp['name'].replace('=', '?')
            if a['origin'] != cp['origin'] and ( len(ch) < 4 or ch[0:4] != ' o=-' ) :    
                err('origin changed change', ch, '\n  act', a, '\n  old id', cp)
            aP = a['path'].rsplit('/', 1)
            aP = aP[0] if len(aP) == 2 else ''
            oP = cp['path'].rsplit('/', 1)
            oP = oP[0] if len(oP) == 2 else ''
            if aP != oP:
                ch += ' p=' + oP.replace('=', '?') 
            if a['trashed'] != cp['trashed']:
                ch += ' t=' + str(cp['trashed'])
            if a['createdTime'] != cp['createdTime']:
                ch += ' c=' + cp['createdTime']
            if a['modifiedTime'] != cp['modifiedTime']:
                if a['modifiedTime'] < cp['modifiedTime']:
                    err(f'cmodifiedTime older act={a} old={cp}')
                ch += ' m=' + cp['modifiedTime']
        a['change'] = ch.strip()

        
        m = goTst(a['modifiedTime'])   # compute act     
        c = m - goTst(a['createdTime'])
        m = gNow - m
        a['act'] = ('-=o' if a['owner'] == user else '-=t' if a['trashed'] != 'False' else 'm=f' if a['mimeType'] == drMimeFolder \
                else 'm=m' if m > 430 * gDay else 'm=c' if abs(c) < 10 else '-=e') \
                 + ' ' + (str(round(m)) + 's' if m < gDay else str(round(m/gDay)) + 'd') \
                 + ' ' + (str(round(c)) + 's' if c < gDay else str(round(c/gDay)) + 'd')
    
    for i in idL: # add deleted since old id
        if i['id'] not in aI and i['id'] not in aO:
            if i['trashed'] == 'del':
                i['act'] = i['change'] = ''
            else:
                i['trashed'] = 'del'
                i['act'] = ''
                sP = i['path'].rsplit('/', 1)
                sP = sP[0] if len(sP) == 2 else ''
                i['change'] = f"-=+ n={i['name']} p={sP}"
            # print('after change', i)
            aL.append(i)
            aI[i['id']] = i
            if i['origin'] != '':
                if i['origin'] in aI:
                    err('ori alread in act id', a)
                elif i['origin'] in aO:
                    err('ori alread in act origin', a)
                else:
                    aO[i['origin']] = i
    return aL

def migMig(goRo, idF, migF, actF, fT):
    """ phase Mig of migration 
        create a copy files and folders to migrate according to act int zFremdeOrigiTemp
        write new version of mig: remove entries already in id, and add entries for newly created files/folders
    """
    print(f'migMig({goRo}, {idF}, {migF}, {actF}, {fT}')
    idL, idI, idO = migRead(goRo['id'], idF)
    migL, migI, migO = migRead(goRo['id'], migF)
    actL, actI, actO = migRead('', actF)

    nL = []
    for m in migL: # add entries from old mig, that are not yet in old id
        if m['origin'] == '':
            err('empty origin in mig', m)
        if m['id'] in idI:
            if m['origin'] == idI[m['id']]['origin']:
                continue # already in old id, do not add it to new mig
        nL.append(m)
    cc = 0
    for a in actL:
        if len(a['act']) < 1 or a['act'][0] != 'm':    # do not migrate
            continue
        if a['id'] in migO:                             # already created
            continue
        if a['origin'] != '':                             # already created
            err('cannot migrate, origin in actF notEmpty', a)
        if a['id'] in idI:
            if idI[a['id']]['origin'] != '':
                err('cannot migrate, origin in idF notEmpty', a, idI[a['id']])
        if a['trashed'] != 'False':
            if a['trashed'] == 'del':
                continue
            err('act trashed, now what?', a)
        if a['mimeType'] == drMimeFolder:
            new = drCreate(fT['id'], a['name'])
        else:
            new = drCopy(a['id'], fT['id'], a['name'])
        print('migCreated', new, 'from', a)
        a['origin'] = a['id'] 
        a['id'] = new['id']
        nL.append(a)
        cc += 1
    print('migMig create', cc, 'new files, from total', len(actL))
    return nL  

def migSwap(goRo, migF, fO, fT):
    """ Swap phase of migration, for each entry in mig
        move newCopy fro zFremdeOrigiTemp to parent
        if folder move contents of origin to newCopy
        move origin to zFremdeOriginale
    """
    migL, migI, migO = migRead(goRo['id'], migF)
    pathSort(migL)
    cnt = 0
    o2n = {}
    for m in migL:
        old = m['origin']
        new = m['id']
        par = m['parent']
        if par in o2n:
            par = o2n[par]
        o2n[old] = new
        isFold = m['mimeType'] == drMimeFolder
        print('swapping', 'folder' if isFold else 'file', m['path'])
        drMove(new, fT['id'], par)
        if isFold: # move contents of folder
            co = drList(old)
            if len(co) > 0:
                for c in co:
                    drMove(c['id'], old, new)
                print('moved content', len(co), m['path'], datetime.now())
                for i in range(1, 12):
                    co = drList(old)
                    if len(co) == 0:
                        break
                    print('old contains', len(co), datetime.now())
                    time.sleep(i)
                if len(co) != 0:
                    err('after move contents', m['path'], 'old not empty', co)
        drMove(old, par, fO['id'])
    return o2n

def migO2n(*di):
    """ return merged o2n dictionary from a number of line 594, in migM such dicts """
    o2n = {}
    for d in di:
        for o, nFi in d.items():
            n = nFi['id']
            if o == '' or n == '':
                err('bad empty origin', o, n)
            elif o != nFi['origin']:
                err('origin', o, 'mismatch for', nFi)
            elif o not in o2n:
                o2n[o] = n
            elif n != o2n[o]: 
                err('old', o, 'new', new, 'mismatches o2c[o]', o2c[o])
    return o2n

def migDoc(root, idF, migF, wfO, wfT):
    """ update the links in all docs
        the link translation is create from current drive list, idF and migF
    """

    aL, aI, aO = migRead(root['id'], '*drive*', {wfO['id'], wfT['id']})
    idL, idI, idO = migRead(root['id'], idF)
    migL, migI, migO = migRead(root['id'], migF)
    o2n = migO2n(idO, migO)
    for o, n in o2n.items():
        if o in aI:
            err('origin in act', i)
    print('after mig', len(o2n), 'in o2n')
    migDocW(aL, o2n)


def migDocW(aL, o2n):
    """ update the links in documents in aL according to o2n """
    cnt = {}
    cntU = 0
    for a in aL:
        mt = a['mimeType']
        if mt in cnt:
            cnt[mt] += 1
        else:
            cnt[mt] = 1
        if mt == 'application/vnd.google-apps.document':
            u = docUpdLinks(a, o2n)  
            if u > 0: 
                cntU += 1
    print('links updated in', cntU, 'of', cnt['application/vnd.google-apps.document'], 'documents, stats', cnt)

###chown: change owner

def chown(us):
    print('chown from', drUser, 'to', us)
    cnt = {'chown': 0, 'mimeNotGoogle': 0}
    wfldO, wfldT, wf, wfIds = workFolders()
    ff = drList("q='" + us + "' in owners and mimeType = 'application/vnd.google-apps.folder' and not name contains 'zFremdeOrigi'")
    print('chown found', len(ff), 'folders owned by', us, [f['name'] for f in ff])
    i2f = {f['id']: f for f in ff}
    gg = [f for f in ff if 'parent' not in f or f['parent'] not in i2f if f['id'] not in wfIds]
    for g in gg:
        g['lv'] = 0
        g['path'] = g['name']
    print('chown found', len(gg), 'root folders owned by', us, [g['path'] for g in gg])
    for f in drWalkLi(gg, stop= wfIds.copy()):
        if f['owner'] in cnt:
            cnt[f['owner']] += 1
        else:
            cnt[f['owner']] = 1
        if f['owner'] != drUser:
            pass # print('not my file', f['path'], f['owner'])
        elif not f['mimeType'].startswith('application/vnd.google-apps.'):
            cnt['mimeNotGoogle'] += 1
            # print('not google file', f['path'], f['mimeType'])
        else:
            cnt['chown'] += 1
            drChown(f, us)
    print('chown from', drUser, 'to', us, 'cnt', cnt)
 
###helpers
def workFolders():
    wfldO = 'zFremdeOriginale'
    wfldT = 'zFremdeOrigiTemp'
    wf = drList(f"q=(name = '{wfldO}' or name = '{wfldT}') and '{drRoot['id']}' in parents and not trashed", orderBy='name')  # 'root' in parents and
    print('wf', wf)
    if len(wf) != 2 or wf[0]['name'] != wfldO or wf[1]['name'] != wfldT :
        err('bad work folders wf:', wf)
    return wfldO, wfldT, wf,  {w['id'] for w in wf}

###main:

import argparse
def main():
    """ the main function """

    parser = argparse.ArgumentParser(description="goffid: googleDriveFilesFoldersId's: ")
    parser.add_argument('cert', help='prefix for certificate. hint a=admin@spWallisellen.ch f=fiwiko@wlkl.ch w=wa@wlkl.ch')
    parser.add_argument('fun', default=None, help='function to perform (see below)') 
    parser.epilog= "functions: " + __doc__.partition('fun:')[2]
    parser.formatter_class=argparse.RawDescriptionHelpFormatter
    parser.add_argument('plus', nargs='*', help='arguments for fun')
    parser.add_argument('-n', help='name of google file')
    parser.add_argument('-p', help='id of parent')
    parser.add_argument('-r', action='store_true', help='recurse (for cp otherwise always true)')
    # parser.print_help()
    args = parser.parse_args()
    print(__file__, f"{args.cert=} {args.fun=}", args)

    driveBuild(args.cert)
    drInfo()

    filePref = 'goffid-' + args.cert
    idF = filePref + '.csv'
    actF = filePref + '-act.csv'
    migF = filePref + '-mig.csv'

    f = args.fun
    print('fun', f)
    if f == 'chown':
        if len(args.plus) != 1:
            err('user missing or multiple for chown user')
        chown(args.plus[0]) 
    elif f == 'cp':
        print('cp, recursive', args.r)
    elif f == 'l':
        ll = list(drWalk('root' if len(args.plus) < 1 else args.plus))
        of = filePref + '-l.csv'
        csvWrite(of, ll, drFldsO)
        os.system(f'libreoffice --calc {of}')
    elif f == 'list':
        ll = drList('q=' + ' '.join(args.plus))
        if len(ll) <= 10:
            for l in ll:
                print(l['name'], l['mimeType'], l['owner'])
        of = filePref + '-list.csv'
        csvWrite(of, ll, drFldsL)
        os.system(f'libreoffice --calc {of}')
    elif f[0:3] == 'mig': 
        print('migrate foreign owned filese and folders')
        wfldO, wfldT, wf, wfIds = workFolders()

        if f == 'migAct':
            mm = migMerge(drRoot, drUser, wfIds, idF, migF)
            csvWrite(actF, pathSort(mm), drFldsP)
            os.system(f'libreoffice --calc {actF}')

        if f == 'migMig' or f == 'mig2':
            mc = migMig(drRoot, idF, migF, actF, wf[1])
            # print('migMig', mc)
            csvWrite(migF, pathSort(mc), drFldsP)
            drUploadVers(migF, 'root', 'text/csv')
        if f == 'migSwap' or f == 'mig2':
            migSwap(drRoot, migF, *wf)
        if f == 'migDoc' or f == 'mig2':
            docsBuild(args.cert)
            migDoc(drRoot, idF, migF, *wf)
        if f == 'migId' or f == 'mig2':
            mm = migMerge(drRoot, drUser, {w['id'] for w in wf}, idF, migF)
            csvWrite(idF, pathSort(mm), drFldsP)
            drUploadVers(idF, 'root', 'text/csv')
            os.system(f'libreoffice --calc {idF}')
    elif f == 'up': #------------- uploadVersion: up<fn>,<datetime>
        if len(args.plus) == 2:
            args.plus.append(None)
        if len(args.plus) != 3:
            err('up arguments filename, mimetype required, modifiedTime optinal')
        if args.p == None:
            args.p = drRoot['id']
        print('uploadVersion from file', args.plus[0], ', mimeType', args.plus[1], 'to googlefile name', args.n, 'folder', args.p, 'with modifiedTime', args.plus[2])
        drUploadVers(args.plus[0], args.p, args.plus[1], dt=args.plus[2], nm=args.n)
    elif f == 'updDoc':
        if len(args.plus) < 1:
            err('updDoc missing docids')
        idL, idI, idO = migRead(drRoot['id'], idF)
        migL, migI, migO = migRead(drRoot['id'], migF)
        o2n = migO2n(idO, migO)
        ff = [drGet(p) for p in args.plus]
        for f in ff:
            g = migI[f['id']] if f['id'] in migI else idI[f['id']] if f['id'] in idI else {'path': '?/' + f['name'], 'lv': 0}
            f['path'] = g['path']
            f['lv'] = g['lv']
        docsBuild(args.cert)
        migDocW(ff, o2n)
    elif len(f) > 2 and f[0: 2] == 't=':
        cCopyMigCsv  = 'testCopyMig.csv'
        cCopyMigFrom = '1l1FqcBu6uQZEXagWRJiwl6_dJtrK7TdY'  # folder spWalHeizen
        cCopyMigTo   = '13BzFmCJUDl8lYtsyWKDpgYKCPBx6Xaoz'  # folder testCopyMig
        t = f[2:]
        print('tst', t)
        if t[0] == 'l': #----------------- list files recursively from given folder
            for f in drWalk(t[1:] if len(t) > 1 else {'1l1FqcBu6uQZEXagWRJiwl6_dJtrK7TdY':':h', 'root':':w'}):
                    print((('  ' * f['lv'] + f['name']).ljust(29) + ' ' + f['id']).ljust(70) + ' ' + f['path'])
        elif t[0:2] == 'co': #------------- test copyMig 1: copy files recursively to cCopyMigTo and write cCopyMigCsv
            with open(cCopyMigCsv, 'w', newline='') as csvfile:
                wri = csv.DictWriter(csvfile, fieldnames=['src', 'copy'])
                wri.writeheader()
                for f in drWalk(t[2:] if len(t) > 2 else cCopyMigFrom):
                        co = drCopy(f['id'], cCopyMigTo, 'testCopyMig-'+f['name'])
                        wri.writerow({'src': f['id'], 'copy': co})
        elif t[0:2] == 'do': #------------- download file
            to = ''
            to = 'test-goffid-unload.csv'
            res = drDownload('1A2tTsVz7AR-zg4UI2aVv7jva-BYumFvu', to)
            rd = csv.DictReader(res if to == '' else open(to, newline=''))
            print('dictReader', rd.fieldnames) 
            for r in rd:
                print(r)
            return
        else:
            err('bad test', f)
    else:
        err('bad fun', f, 'see help (-h)\n\n' + parser.format_help())
    print('end of', __file__)

if __name__ == '__main__':
    main()