python/goffid.py
"""
goffid.py: Google Files Folders Ids utitlities
goFfId.py [-h] [-n name] [-p parent] cert fun plus*
cert: the google certificate to use, hint currently inuse: a=admin@spWallisellen.ch f=fiwiko@wlkl.ch w=wa@wlkl.ch
fun: the function to perform
Migration: replace foreign files by copies of root owner. uses 3 files
* id: files in drive, including current googleDriveId and originId (if replaced): goffid-<cert>.csv in driveRoot
* act: contains changes since last id, plus proposed migration actions : goffid-<cert>-act.csv in local directory
* mig: files in migration process with current/future and origin ids: goffid-<cert>-mig.csv in driveRoot
The different steps
* migAct: analyse drive list, id, mig and produce act, with migration proposals
* manually: check/modify act
* migMig: for the files in act with migration action,
create copies, for folders empty folders in zFremdeOrigiTemp
(if not already created and registered in mig)
and append them to mig, after removing entries in mig, that are already in id
* migSwap: for each file/folder in mig
move the new copy from zFremdeOrigiTemp to destination folder
for directories, move the contained elements to new folder
move the origin to zFremdeOriginale
* migDoc update the links in all documents (origin -> id from id and mig (if swapped)
* migId: analyse drive list, id, mig and upload a new version of id, with
updated list of origins
changes since old version of id
* mig2 steps migMig to migId in sequence
chown user: change the owner to user
of files owned by cert and residing in folders owned by by user, if possible
l
list query: google q= ist joined from plus. attention for shell quoting, use e.g.
./goffid.py w list 'mimeType="application/vnd.google-apps.folder"' and "'root'" in parents
up filename mimetype modifiedTime?; upload filename to drive
with name -n (default filename) in folder -p (default root) with the given modifiedTime
updDoc docid+: update the links in the docs with the given ids, using id and mig for the mapping origin -> new
8. 4.23 moved script to pc/bin, moved authorizations to /wkData/wk/extr2
"""
# from __future__ import print_function
import pickle
import os.path
import csv
from datetime import datetime
import time
from io import BytesIO
from io import StringIO
def err(*m):
print('\n*****\nerror in', __file__ + ':', *m)
x = 1 / 0
def extendReverse(l, r):
for i in range(len(r)-1, -1, -1):
l.append(r[i])
def csvWrite(fn, li, flds):
""" write a list of dictionary to a csv file
fn filename to write to
li the list of dictionaries to write
flds a string with the fieldnames to write (separated by ' ' or ','
"""
with open(fn, 'w', newline='') as fi:
wr = csv.DictWriter(fi, fieldnames=flds.replace(',', ' ').split())
wr.writeheader()
wr.writerows(li)
print('written', len(li), 'rows to', fn)
def csvReadFi(fi, close=False):
r = list(csv.DictReader(fi))
if close:
fi.close()
return r
def csvRead(fn):
with open(fn, newline='') as f:
res = csvReadFi(f)
print('csvRead', fn, len(res))
return res
#####go: google tiny helpers
gNow = datetime.now().timestamp() # seconds since epoch
gDay = 86400 # seconds in a day
def goTst(s): # google date time string to timstamp
if s[-1] == 'Z':
return datetime.fromisoformat(s[0:-1]).timestamp()
else:
err(f'goTst {s} bad timestamp format')
# return datetime.fromisoformat('2021-07-11T12:16:47.774+00:00').timestamp()
#####go: google credentials and discovery
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
def goCredentials(crt):
if type(crt) != str or len(crt) < 1:
err(f"bad certification name: {crt} {type(crt)}")
""" load the credentials form google OAuth2,
see Google APPI Console https://console.developers.google.com/?authuser=0&project=quickstart-1611556606696
"""
# If modifying these scopes, delete the file token.pickle.
SCOPES =[ 'https://www.googleapis.com/auth/drive.metadata.readonly' # drive readonly
, 'https://www.googleapis.com/auth/drive' # drive file update
, 'https://www.googleapis.com/auth/documents.readonly' # docs readonly
#, 'https://www.googleapis.com/auth/documents' # docs readWrite
]
tokPiFi = f'token-{crt}.pickle' # the file to cache authorization
credJson = '/wkData/pc/googleAPIcredentials.json' # the credentials file generated by google API
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(tokPiFi):
with open(tokPiFi, 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(credJson, SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open(tokPiFi, 'wb') as token:
pickle.dump(creds, token)
return creds
#####dr: google drive
# our standard fields for file (dicts)
drFlds1 = "name, id, mimeType, createdTime, modifiedTime, trashed"
drFldsAPI = drFlds1 + ", parents, owners(emailAddress)"
drFldsL = drFlds1 + ", parent owner"
drFldsO = drFldsL + ", lv path"
drFldsP = drFldsO + ", origin change act"
drMimeFolder = 'application/vnd.google-apps.folder'
def pathSort(l):
""" sort the list of file dicts by path """
l.sort(key=lambda f: f['path'])
return l
def driveBuild(cert):
global driveSrv
driveSrv = build('drive', 'v3', credentials=goCredentials(cert))
def drInfo():
""" print the drive info """
global drUser, drRoot
# Call the Drive v3 API
ab = driveSrv.about().get(fields='user(displayName, emailAddress, kind), storageQuota').execute()
# print(ab)
drUser = ab['user']['emailAddress']
drRoot = drGet('root')
#print('root', drRoot)
print(ab['user']['kind'], ab['user']['displayName'], drUser
, '; storage (MB) usage', round(int(ab['storageQuota']['usage'])/1048576), ', trash', round(int(ab['storageQuota']['usageInDriveTrash'])/1048576)
, ', limit', round(int(ab['storageQuota']['limit'])/1048576), 'root=', drRoot['name'], drRoot['id'])
def drFiAtts(f):
""" return the file dict f after default massage of attributes """
if 'parents' in f: # add the additional fields
if len(f['parents']) != 1:
err('parents in', f);
f['parent'] = f['parents'][0]
del f['parents']
if 'owners' in f:
if len(f['owners']) != 1 or 'emailAddress' not in f['owners'][0]:
err('owners in', f);
f['owner'] = f['owners'][0]['emailAddress']
del f['owners']
return f
def drGet(fid, fFlds = drFldsAPI):
""" get the file dict for the given id """
return drFiAtts(driveSrv.files().get(fileId=fid,fields=fFlds).execute())
def drList(foId, fFlds = drFldsAPI, orderBy=None):
""" return a list of files and folders directly in the google drive folder foId
each file or folder as a dict with the given fields
"""
ff = []
q = foId[2:] if foId[0:2] == 'q=' else '' if foId == '' else f"'{foId}' in parents"
next = None
while True:
res = driveSrv.files().list(
pageSize=500, pageToken=next, fields=f"nextPageToken, files({fFlds})"
, q=q, orderBy=orderBy
).execute()
# print(f'list found {len(res)}: {res}');
ac = res.get('files', [])
for a in ac:
drFiAtts(a)
ff += ac
next = res.get('nextPageToken')
if next == None:
return ff
def drListOne(pa, nm, retNone=False):
""" return file dict for a filename in a parent folder
return None if it does not exist
error if multiple files or thrashed
"""
res = drList(f"q=name='{nm}' and '{pa}' in parents")
if len(res) == 1 and not res[0]['trashed']:
return drFiAtts(res[0])
elif len(res) == 0:
return None if retNone else err(f'drListOne: no file {pa}/{nm}')
else:
err(f'drListOne: file {pa}/{nm} multiple or thrashed', res)
def drWalk(af,fFlds = drFldsAPI, stop=None):
""" iterate over all files from one or several folders in a google drive recursively, depth first
af can be
id of folder
a list of id's of folders
a dictionary of folderId => path root name
yields drWalkLi
"""
at = type(af)
ff = []
for i, pa in (af if at == dict else {af: ''} if at == str else {i: '' for i in af}).items(): # arg af to dict
f = drGet(i, fFlds)
f['path'] = pa
f['lv'] = 0
ff.append(f)
yield from drWalkLi(ff, fFlds, stop)
def drWalkLi(ff, fFlds = drFldsAPI, stop=None):
""" iterate over all files in list ff recursively.
the elements of ff must be a dict with fields fFlds after drFiAtts plus path
yields a dict of the fieldnames given by fFlds (after drFiAtts), plus
'lv' the level within the filetree, starting with 0 in the folders in af
"""
doneOrQueued = set() if stop==None else stop
print('drWalkLi(', [f['name'] for f in ff], fFlds, stop, ')')
stck = []
extendReverse(stck, ff) # push the start list to the work stack
# for lists only append and pop from the end are efficient
cF = cT = 0
while len(stck) > 0:
f = stck.pop() # pop next item from stck
if f['id'] in doneOrQueued:
print(f"--drWalkLi already doneOrQueued {f['path']} {f['lv']} {f['mimeType']} {f['id']}")
else:
doneOrQueued.add(f['id'])
yield f
cT += 1
if f['mimeType'] == drMimeFolder:
cF += 1
ch = drList(f['id'], fFlds, 'name') # get contents of this folder
lv = f['lv'] + 1
pp = f['path'] + '/' if f['path'] != '' else ''
for c in ch:
c['lv'] = lv
c['path'] = pp + c['name']
extendReverse(stck, ch) # push the children to the work stack
print(f'--drWalkLi {cF}/{cT} stack {len(stck)} level {f["lv"]} beginning {f["path"]}')
print(f'--drWalkLi {cF}/{cT} end of iterator')
def drCopy(id, pa, fn):
""" copy google drive file with id into folder pa with new name nm and return id of new copy """
# gen = driveSrv.files().generateIds(count=2).execute() !!! do not use fails with: Generated IDs are not supported for Docs Editors formats.
res = driveSrv.files().copy(fileId=id, fields=drFldsAPI, body={'name':fn, 'parents': [pa]}).execute()
print('copy result', fn, res)
return drFiAtts(res)
def drCreate(pa, nm, mime=drMimeFolder):
""" create a file in folder pa, with the give name and mimeType, withoud data """
return drFiAtts(driveSrv.files().create(body={'name': nm, 'parents': [pa], 'mimeType': mime}, fields=drFldsAPI).execute())
def drUploadVers(fn, pa, mime, dt=None, nm=None):
"""
upload a new version of a file to googleDrive or if it does not exist yet, create the file
set keepRevisionForever=True, otherwise the revision will soon disappear, which is not what we need here
setting the modifiedTime, will set this as the upload time of the version and order the revisions by this upload time
"""
if nm == None:
nm = fn
if dt != None: #format RFC 3339: 2020-03-17T09:35:22.771Z
if len(dt) == 10:
dt += 'T00:00:00'
if dt[-1] != 'Z':
dt += 'Z'
old = drListOne(pa, nm, True)
media = MediaFileUpload(fn, mimetype=mime)
if old == None:
body = {'name': nm, 'parents': [pa]}
if dt != None:
body['modifiedTime'] = dt
new = driveSrv.files().create(keepRevisionForever=True, body=body, media_body=media, fields=drFldsAPI).execute()
else:
new = driveSrv.files().update(fileId=old['id'], keepRevisionForever=True, media_body=media
, body= {} if dt == None else {'modifiedTime': dt}, fields=drFldsAPI).execute()
drFiAtts(new)
print('drUploadVers uploaded', 'file' if old == None else 'version', new)
return new
def drMove(id, oldPa, newPa):
# print('drMove', id, 'in', oldPa, 'to', newPa)
try:
res = driveSrv.files().update(fileId=id, removeParents=oldPa, addParents=newPa, fields=drFldsAPI).execute()
except Exception as e:
print(f"***\n*** error drMove({id}, {oldPa}, {newPa})\n*** exception:", e)
print('*** drget', id, drGet(id))
raise e
drFiAtts(res)
if res['parent'] != newPa:
print('drMove not in new parent', oldPa, 'to', newPa, 'res', res)
print('drMove', oldPa, 'to', newPa, 'res', res)
return res
def drDownload(sid, fn=None):
""" download a google drive file with id sid
if fn == '' then return a StringIO (buffer in memory)
else return write it to the given filename fn and return filename
"""
fh = BytesIO() if fn == None else open(fn, 'wb')
req = driveSrv.files().get_media(fileId=sid)
downloader = MediaIoBaseDownload(fh, req)
done = False
while done is False:
status, done = downloader.next_chunk()
# print("Download %d%%." % int(status.progress() * 100))
# print('drDownload', status)
if fn == None:
return StringIO(str(fh.getbuffer(), encoding='utf-8', errors='strict'))
else:
fh.close()
return fn
def drChown(fi, us):
""" change the owner of file with fileDict fi to user us """
print('drChown changing', fi['path'], 'owner to', us)
prms = driveSrv.permissions().list(fileId=fi['id'], fields='*').execute()['permissions']
#print('prms', prms)
pN = [p for p in prms if 'emailAddress' in p and p['emailAddress'] == us]
# print('pN ***', len(pN), pN)
try:
if len(pN) >= 1:
res = driveSrv.permissions().update(fileId=fi['id'], permissionId=pN[0]['id'], transferOwnership=True, body={'role': 'owner'}).execute()
else:
res = driveSrv.permissions().create(fileId=fi['id'], transferOwnership=True, body={'role': 'owner', 'type': 'user', 'emailAddress': us}).execute()
except BaseException as e:
err('drChown for', fi['path'], 'update/create except', e, '\nfile', fi)
# print(res)
nn = drGet(fi['id'])
# print('after', nn)
if fi['name'] != nn['name'] or fi['id'] != nn['id'] :
err('drChown mismatch fi', fi, '<==>', nn)
elif us == nn['owner']:
pass # print(fi['path'], fi['id'], 'owner from', fi['owner'], 'changedTo', nn['owner'], nn)
else:
err('could not chown fi', fi, 'to nn', nn, 'permissions', prms)
###doc: google docs
def docsBuild(cert):
global docsSrv
docsSrv = build('docs', 'v1', credentials=goCredentials(cert))
""" metadata for docLinks for document structure, a list of m describing a document ele e ( a dict) as follows
[ key: if key in e, then use this m to describe e
, eKeys: set of allowed keys in e
, sKeys: if a set the go to subelement s = e[key] and the allowed keys in s are sKeys elif sKey=None s=e
, sty: process the style s[sty]
, chi: a list of subelements to process as document ele: for ck in chi: for c in s[ck]: recursively process docElement c
]
"""
docMeta = [ ['paragraph', {'paragraph', 'startIndex', 'endIndex'}, {'paragraphStyle', 'suggestedParagraphStyleChanges', 'elements', 'bullet', 'suggestedBulletChanges', 'positionedObjectIds', 'suggestedPositionedObjectIds'}, 'paragraphStyle', ['elements']]
, ['textRun', {'textRun', 'startIndex', 'endIndex'}, {'content', 'textStyle', 'suggestedInsertionIds', 'suggestedDeletionIds', 'suggestedTextStyleChanges'}, 'textStyle', []]
, ['pageBreak', {'pageBreak', 'startIndex', 'endIndex'}, {'textStyle'}, 'textStyle', []]
, ['sectionBreak', {'sectionBreak', 'startIndex', 'endIndex'}, {'sectionStyle'}, 'sectionStyle', []]
, ['tableOfContents', {'startIndex', 'endIndex', 'tableOfContents'}, {'content'}]
, ['footnoteReference', {'startIndex', 'endIndex', 'footnoteReference'}, None]
, ['inlineObjectElement', {'startIndex', 'endIndex', 'inlineObjectElement'}, {'inlineObjectId', 'textStyle'}, 'textStyle', []]
, ['table', {'startIndex', 'endIndex', 'table'}, {'rows', 'columns', 'tableRows', 'tableStyle'}, 'tableStyle', ['tableRows']]
, ['tableCells', {'startIndex', 'endIndex', 'tableCells', 'tableRowStyle'}, None, 'tableRowStyle', ['tableCells']]
, ['content', {'startIndex', 'endIndex', 'content', 'tableCellStyle'}, None, 'tableCellStyle', ['content']]
, ['horizontalRule', {'startIndex', 'endIndex', 'horizontalRule'}, {'textStyle'}, 'textStyle', []]
]
def docLinks(f):
""" iterate over the links in the given by the dict for a google drive filedocument
yield the element {startIndex: , and endIndex: , ... }, url
"""
doc = docsSrv.documents().get(documentId=f['id']).execute()
# print('document title', doc.get('title'), 'path', f['path'])
cont = doc.get('body').get('content')
# print('---\n content', cont, '\n---')
stck = []
extendReverse(stck, cont)
while len(stck) > 0:
c = stck.pop()
for m in docMeta:
if m[0] in c:
break
else:
err('unknown content', c.keys(), c)
if not set(c.keys()) <= m[1]:
err('unknown keys in c', m, c.keys(), c)
if m[2] == None:
s = c
else:
s = c[m[0]] # go to nested subelement
if not set(s.keys()) <= m[2]:
err('bad keys in', m[0], m, '\n s.keys', s.keys(), '\n content', c)
if len(m) < 4:
print('+++ ignoring', m[0])
continue
# print('*', m[0].ljust(15), c['startIndex'] if 'startIndex' in c else '-', c['endIndex'])
if 'link' in s[m[3]]:
if 'url' in s[m[3]]['link']:
yield c, s[m[3]]['link']['url']
elif 'headingId' not in s[m[3]]['link'] and 'bookmarkId' not in s[m[3]]['link']:
err('no url in link', c, s)
for e in m[4]:
extendReverse(stck, s[e])
return
def goUrlId(u):
uL = u.lower()
# find the start of the google id in url
sx = -1
if uL.find('google.com/') < 0 :
return None, None
for p in ['google.com/document/d/', 'docs.google.com/document/u/0/d/', 'docs.google.com/document/u/1/d/', 'docs.google.com/document/u/2/d/', 'google.com/spreadsheets/d/', 'google.com/file/d/', 'google.com/drive/u/0/folders/', 'google.com/drive/u/1/folders/', 'google.com/drive/u/2/folders/', 'google.com/drive/folders/'
, 'google.com/presentation/d/', '?id=', '&id=']:
sx = uL.find(p)
if sx >= 0:
sx += len(p)
break
if sx < 0:
if uL in {'https://drive.google.com', 'https://drive.google.com/drive', 'https://drive.google.com/drive/shared-with-me'}:
return None, None
for p in ['accounts.google.com', 'calendar.google.com/', 'groups.google.com/', 'meet.google.com/', 'takeout.google.com/', '@googlegroups']:
if uL.find(p) >= 0:
return None, None
err('cannot migrate url', u)
# find the end of the google id in url
for t in '/?#&':
ex = uL.find(t, sx)
if ex >= sx:
break
else:
ex = len(uL)
return sx, ex
def docUpdLinks(f, o2n):
req = []
txt = []
cntL = 0
print('docUpdlinks', f['path'], f['id'], 'begin')
for e, u in docLinks(f):
cntL += 1
sx, ex = goUrlId(u) # find google id in url
if sx == None:
print(' ignoring url', u, 'at', e['startIndex'], e['endIndex'])
else:
old = u[sx: ex]
if old not in o2n:
print(' keeping url', u, 'id=', old, 'at', e['startIndex'], e['endIndex'])
else:
new = o2n[old]
ln = u[0: sx] + new + u[ex:]
if 'textRun' in e and 'content' in e['textRun']:
ix = e['textRun']['content'].find(old)
if ix >= 0:
t = (e['startIndex']+ix, e['startIndex']+ix+len(old), new)
if t[0] < e['startIndex'] or t[1] > e['endIndex']:
err('boundary in text', old, new, t, e)
txt.append(t)
print(' updating url', u, 'id=', old, '==>', new, 'at', e['startIndex'], e['endIndex'])
req.append({'updateTextStyle': {'textStyle': {'link': {'url': ln}}, 'fields': 'link.url', 'range': {'startIndex': e['startIndex'], 'endIndex': e['endIndex']}}})
print('docUpdlinks', f['path'], len(req), 'updates', 'of', cntL, 'links and', len(txt), 'texts')
dlt = 0 # warning startIndex etc. change with text modifications!
for t in txt:
req.append({'insertText': {'text': t[2], 'location': {'index': t[1]+dlt}}})
req.append({'deleteContentRange': {'range': {'startIndex': t[0] + dlt, 'endIndex': t[1]+dlt}}})
dlt += len(t[2]) - t[1] + t[0]
if len(req) > 0:
# print('req', len(req), req)
rs = docsSrv.documents().batchUpdate(documentId=f['id'], body={'requests' : req}).execute()
# print('migrated', len(req), "req's, response", rs)
return len(req)
###mig: Migrate foreign files and keep track of id's, see module __doc__
def migRead(goRo, fn, stop=None):
if stop != None:
l = list(drWalk(goRo, stop=stop))
elif goRo == '':
l = csvRead(fn)
else:
l = csvReadFi(drDownload(drListOne(goRo, fn)['id']))
i = {}
o = {}
for e in l:
i[e['id']] = e if e['id'] not in i else err(fn, 'duplicate id', e)
if 'origin' not in e:
e['origin'] = ''
if len(e['origin']) >= 1:
o[e['origin']] = e if e['origin'] not in o else err(fn, 'duplicate origin', e)
if 'kind' in e:
del e['kind']
if 'chFrom' in e:
del e['chFrom']
e['trashed'] = str(e['trashed']) # after csv boolean is migrated to str - and then compares do not work!
print(fn, 'len', len(l), len(i), len(o))
# pathSort(l)
return l, i, o
def migMerge(root, user, stop, idF, migF):
""" used for migragion phases Act and Id
create a copy of all foreign files from drWalk(fld) into fT
return a list of all files, the newly created copies linked to origin an marked with trashed= migCre
"""
print(f'\nmigmerge root={root}\n, user={user}\n, stop={stop}\n, idF, migF, actF')
aL, aI, aO = migRead(root['id'], '*drive*', stop)
# print('test aI', '1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo', aI['1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo'])
idL, idI, idO = migRead(root['id'], idF)
# print('test idI', '1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo', idI['1ZZswnELkHvqyBvMqX2YT_nRr-Kqa2OE8e0L4O7OcVBo'])
migL, migI, migO = migRead(root['id'], migF)
for o,i in idO.items(): # transfer origins from old id
if o in aI:
err('old id origin in act', i)
elif i['id'] not in aI:
print('???idO id not in aI', i)
else:
a = aI[i['id']]
if a['origin'] != '':
err('origin already set, old id', i, 'in act', a)
a['origin'] = o
aO[o] = a
print('orgin added to act from old id', i)
for o,m in migO.items(): # transfer origins from mig
if o in aI:
print('mig origin', 'and id' if m['id'] in aI else '', ' in act', m)
elif m['id'] not in aI:
err ('mig: neither id nor origin in act', m)
else:
a = aI[m['id']]
if a['origin'] != '':
if o != a['origin']:
err('mig origin mismatch, mig=', m, 'act=', a)
else:
print('mig same origin', m)
else:
a['origin'] = o
aO[o] = a
print('orgin added to act from mig', m)
for a in aL:
ch = '' # compute change
cp = None
if a['id'] in aO:
err('act id in act origin', a)
elif a['id'] not in idI:
if a['origin'] in idI:
ch += ' o=-'
cp = idI[a['origin']]
if cp['origin'] != '':
err('swapped but origin already swapped\n act', a, '\n origin', cp)
else:
ch += ' +=-'
else:
cp = idI[a['id']]
if cp != None:
if a['name'] != cp['name']:
ch += ' n=' + cp['name'].replace('=', '?')
if a['origin'] != cp['origin'] and ( len(ch) < 4 or ch[0:4] != ' o=-' ) :
err('origin changed change', ch, '\n act', a, '\n old id', cp)
aP = a['path'].rsplit('/', 1)
aP = aP[0] if len(aP) == 2 else ''
oP = cp['path'].rsplit('/', 1)
oP = oP[0] if len(oP) == 2 else ''
if aP != oP:
ch += ' p=' + oP.replace('=', '?')
if a['trashed'] != cp['trashed']:
ch += ' t=' + str(cp['trashed'])
if a['createdTime'] != cp['createdTime']:
ch += ' c=' + cp['createdTime']
if a['modifiedTime'] != cp['modifiedTime']:
if a['modifiedTime'] < cp['modifiedTime']:
err(f'cmodifiedTime older act={a} old={cp}')
ch += ' m=' + cp['modifiedTime']
a['change'] = ch.strip()
m = goTst(a['modifiedTime']) # compute act
c = m - goTst(a['createdTime'])
m = gNow - m
a['act'] = ('-=o' if a['owner'] == user else '-=t' if a['trashed'] != 'False' else 'm=f' if a['mimeType'] == drMimeFolder \
else 'm=m' if m > 430 * gDay else 'm=c' if abs(c) < 10 else '-=e') \
+ ' ' + (str(round(m)) + 's' if m < gDay else str(round(m/gDay)) + 'd') \
+ ' ' + (str(round(c)) + 's' if c < gDay else str(round(c/gDay)) + 'd')
for i in idL: # add deleted since old id
if i['id'] not in aI and i['id'] not in aO:
if i['trashed'] == 'del':
i['act'] = i['change'] = ''
else:
i['trashed'] = 'del'
i['act'] = ''
sP = i['path'].rsplit('/', 1)
sP = sP[0] if len(sP) == 2 else ''
i['change'] = f"-=+ n={i['name']} p={sP}"
# print('after change', i)
aL.append(i)
aI[i['id']] = i
if i['origin'] != '':
if i['origin'] in aI:
err('ori alread in act id', a)
elif i['origin'] in aO:
err('ori alread in act origin', a)
else:
aO[i['origin']] = i
return aL
def migMig(goRo, idF, migF, actF, fT):
""" phase Mig of migration
create a copy files and folders to migrate according to act int zFremdeOrigiTemp
write new version of mig: remove entries already in id, and add entries for newly created files/folders
"""
print(f'migMig({goRo}, {idF}, {migF}, {actF}, {fT}')
idL, idI, idO = migRead(goRo['id'], idF)
migL, migI, migO = migRead(goRo['id'], migF)
actL, actI, actO = migRead('', actF)
nL = []
for m in migL: # add entries from old mig, that are not yet in old id
if m['origin'] == '':
err('empty origin in mig', m)
if m['id'] in idI:
if m['origin'] == idI[m['id']]['origin']:
continue # already in old id, do not add it to new mig
nL.append(m)
cc = 0
for a in actL:
if len(a['act']) < 1 or a['act'][0] != 'm': # do not migrate
continue
if a['id'] in migO: # already created
continue
if a['origin'] != '': # already created
err('cannot migrate, origin in actF notEmpty', a)
if a['id'] in idI:
if idI[a['id']]['origin'] != '':
err('cannot migrate, origin in idF notEmpty', a, idI[a['id']])
if a['trashed'] != 'False':
if a['trashed'] == 'del':
continue
err('act trashed, now what?', a)
if a['mimeType'] == drMimeFolder:
new = drCreate(fT['id'], a['name'])
else:
new = drCopy(a['id'], fT['id'], a['name'])
print('migCreated', new, 'from', a)
a['origin'] = a['id']
a['id'] = new['id']
nL.append(a)
cc += 1
print('migMig create', cc, 'new files, from total', len(actL))
return nL
def migSwap(goRo, migF, fO, fT):
""" Swap phase of migration, for each entry in mig
move newCopy fro zFremdeOrigiTemp to parent
if folder move contents of origin to newCopy
move origin to zFremdeOriginale
"""
migL, migI, migO = migRead(goRo['id'], migF)
pathSort(migL)
cnt = 0
o2n = {}
for m in migL:
old = m['origin']
new = m['id']
par = m['parent']
if par in o2n:
par = o2n[par]
o2n[old] = new
isFold = m['mimeType'] == drMimeFolder
print('swapping', 'folder' if isFold else 'file', m['path'])
drMove(new, fT['id'], par)
if isFold: # move contents of folder
co = drList(old)
if len(co) > 0:
for c in co:
drMove(c['id'], old, new)
print('moved content', len(co), m['path'], datetime.now())
for i in range(1, 12):
co = drList(old)
if len(co) == 0:
break
print('old contains', len(co), datetime.now())
time.sleep(i)
if len(co) != 0:
err('after move contents', m['path'], 'old not empty', co)
drMove(old, par, fO['id'])
return o2n
def migO2n(*di):
""" return merged o2n dictionary from a number of line 594, in migM such dicts """
o2n = {}
for d in di:
for o, nFi in d.items():
n = nFi['id']
if o == '' or n == '':
err('bad empty origin', o, n)
elif o != nFi['origin']:
err('origin', o, 'mismatch for', nFi)
elif o not in o2n:
o2n[o] = n
elif n != o2n[o]:
err('old', o, 'new', new, 'mismatches o2c[o]', o2c[o])
return o2n
def migDoc(root, idF, migF, wfO, wfT):
""" update the links in all docs
the link translation is create from current drive list, idF and migF
"""
aL, aI, aO = migRead(root['id'], '*drive*', {wfO['id'], wfT['id']})
idL, idI, idO = migRead(root['id'], idF)
migL, migI, migO = migRead(root['id'], migF)
o2n = migO2n(idO, migO)
for o, n in o2n.items():
if o in aI:
err('origin in act', i)
print('after mig', len(o2n), 'in o2n')
migDocW(aL, o2n)
def migDocW(aL, o2n):
""" update the links in documents in aL according to o2n """
cnt = {}
cntU = 0
for a in aL:
mt = a['mimeType']
if mt in cnt:
cnt[mt] += 1
else:
cnt[mt] = 1
if mt == 'application/vnd.google-apps.document':
u = docUpdLinks(a, o2n)
if u > 0:
cntU += 1
print('links updated in', cntU, 'of', cnt['application/vnd.google-apps.document'], 'documents, stats', cnt)
###chown: change owner
def chown(us):
print('chown from', drUser, 'to', us)
cnt = {'chown': 0, 'mimeNotGoogle': 0}
wfldO, wfldT, wf, wfIds = workFolders()
ff = drList("q='" + us + "' in owners and mimeType = 'application/vnd.google-apps.folder' and not name contains 'zFremdeOrigi'")
print('chown found', len(ff), 'folders owned by', us, [f['name'] for f in ff])
i2f = {f['id']: f for f in ff}
gg = [f for f in ff if 'parent' not in f or f['parent'] not in i2f if f['id'] not in wfIds]
for g in gg:
g['lv'] = 0
g['path'] = g['name']
print('chown found', len(gg), 'root folders owned by', us, [g['path'] for g in gg])
for f in drWalkLi(gg, stop= wfIds.copy()):
if f['owner'] in cnt:
cnt[f['owner']] += 1
else:
cnt[f['owner']] = 1
if f['owner'] != drUser:
pass # print('not my file', f['path'], f['owner'])
elif not f['mimeType'].startswith('application/vnd.google-apps.'):
cnt['mimeNotGoogle'] += 1
# print('not google file', f['path'], f['mimeType'])
else:
cnt['chown'] += 1
drChown(f, us)
print('chown from', drUser, 'to', us, 'cnt', cnt)
###helpers
def workFolders():
wfldO = 'zFremdeOriginale'
wfldT = 'zFremdeOrigiTemp'
wf = drList(f"q=(name = '{wfldO}' or name = '{wfldT}') and '{drRoot['id']}' in parents and not trashed", orderBy='name') # 'root' in parents and
print('wf', wf)
if len(wf) != 2 or wf[0]['name'] != wfldO or wf[1]['name'] != wfldT :
err('bad work folders wf:', wf)
return wfldO, wfldT, wf, {w['id'] for w in wf}
###main:
import argparse
def main():
""" the main function """
parser = argparse.ArgumentParser(description="goffid: googleDriveFilesFoldersId's: ")
parser.add_argument('cert', help='prefix for certificate. hint a=admin@spWallisellen.ch f=fiwiko@wlkl.ch w=wa@wlkl.ch')
parser.add_argument('fun', default=None, help='function to perform (see below)')
parser.epilog= "functions: " + __doc__.partition('fun:')[2]
parser.formatter_class=argparse.RawDescriptionHelpFormatter
parser.add_argument('plus', nargs='*', help='arguments for fun')
parser.add_argument('-n', help='name of google file')
parser.add_argument('-p', help='id of parent')
parser.add_argument('-r', action='store_true', help='recurse (for cp otherwise always true)')
# parser.print_help()
args = parser.parse_args()
print(__file__, f"{args.cert=} {args.fun=}", args)
driveBuild(args.cert)
drInfo()
filePref = 'goffid-' + args.cert
idF = filePref + '.csv'
actF = filePref + '-act.csv'
migF = filePref + '-mig.csv'
f = args.fun
print('fun', f)
if f == 'chown':
if len(args.plus) != 1:
err('user missing or multiple for chown user')
chown(args.plus[0])
elif f == 'cp':
print('cp, recursive', args.r)
elif f == 'l':
ll = list(drWalk('root' if len(args.plus) < 1 else args.plus))
of = filePref + '-l.csv'
csvWrite(of, ll, drFldsO)
os.system(f'libreoffice --calc {of}')
elif f == 'list':
ll = drList('q=' + ' '.join(args.plus))
if len(ll) <= 10:
for l in ll:
print(l['name'], l['mimeType'], l['owner'])
of = filePref + '-list.csv'
csvWrite(of, ll, drFldsL)
os.system(f'libreoffice --calc {of}')
elif f[0:3] == 'mig':
print('migrate foreign owned filese and folders')
wfldO, wfldT, wf, wfIds = workFolders()
if f == 'migAct':
mm = migMerge(drRoot, drUser, wfIds, idF, migF)
csvWrite(actF, pathSort(mm), drFldsP)
os.system(f'libreoffice --calc {actF}')
if f == 'migMig' or f == 'mig2':
mc = migMig(drRoot, idF, migF, actF, wf[1])
# print('migMig', mc)
csvWrite(migF, pathSort(mc), drFldsP)
drUploadVers(migF, 'root', 'text/csv')
if f == 'migSwap' or f == 'mig2':
migSwap(drRoot, migF, *wf)
if f == 'migDoc' or f == 'mig2':
docsBuild(args.cert)
migDoc(drRoot, idF, migF, *wf)
if f == 'migId' or f == 'mig2':
mm = migMerge(drRoot, drUser, {w['id'] for w in wf}, idF, migF)
csvWrite(idF, pathSort(mm), drFldsP)
drUploadVers(idF, 'root', 'text/csv')
os.system(f'libreoffice --calc {idF}')
elif f == 'up': #------------- uploadVersion: up<fn>,<datetime>
if len(args.plus) == 2:
args.plus.append(None)
if len(args.plus) != 3:
err('up arguments filename, mimetype required, modifiedTime optinal')
if args.p == None:
args.p = drRoot['id']
print('uploadVersion from file', args.plus[0], ', mimeType', args.plus[1], 'to googlefile name', args.n, 'folder', args.p, 'with modifiedTime', args.plus[2])
drUploadVers(args.plus[0], args.p, args.plus[1], dt=args.plus[2], nm=args.n)
elif f == 'updDoc':
if len(args.plus) < 1:
err('updDoc missing docids')
idL, idI, idO = migRead(drRoot['id'], idF)
migL, migI, migO = migRead(drRoot['id'], migF)
o2n = migO2n(idO, migO)
ff = [drGet(p) for p in args.plus]
for f in ff:
g = migI[f['id']] if f['id'] in migI else idI[f['id']] if f['id'] in idI else {'path': '?/' + f['name'], 'lv': 0}
f['path'] = g['path']
f['lv'] = g['lv']
docsBuild(args.cert)
migDocW(ff, o2n)
elif len(f) > 2 and f[0: 2] == 't=':
cCopyMigCsv = 'testCopyMig.csv'
cCopyMigFrom = '1l1FqcBu6uQZEXagWRJiwl6_dJtrK7TdY' # folder spWalHeizen
cCopyMigTo = '13BzFmCJUDl8lYtsyWKDpgYKCPBx6Xaoz' # folder testCopyMig
t = f[2:]
print('tst', t)
if t[0] == 'l': #----------------- list files recursively from given folder
for f in drWalk(t[1:] if len(t) > 1 else {'1l1FqcBu6uQZEXagWRJiwl6_dJtrK7TdY':':h', 'root':':w'}):
print(((' ' * f['lv'] + f['name']).ljust(29) + ' ' + f['id']).ljust(70) + ' ' + f['path'])
elif t[0:2] == 'co': #------------- test copyMig 1: copy files recursively to cCopyMigTo and write cCopyMigCsv
with open(cCopyMigCsv, 'w', newline='') as csvfile:
wri = csv.DictWriter(csvfile, fieldnames=['src', 'copy'])
wri.writeheader()
for f in drWalk(t[2:] if len(t) > 2 else cCopyMigFrom):
co = drCopy(f['id'], cCopyMigTo, 'testCopyMig-'+f['name'])
wri.writerow({'src': f['id'], 'copy': co})
elif t[0:2] == 'do': #------------- download file
to = ''
to = 'test-goffid-unload.csv'
res = drDownload('1A2tTsVz7AR-zg4UI2aVv7jva-BYumFvu', to)
rd = csv.DictReader(res if to == '' else open(to, newline=''))
print('dictReader', rd.fieldnames)
for r in rd:
print(r)
return
else:
err('bad test', f)
else:
err('bad fun', f, 'see help (-h)\n\n' + parser.format_help())
print('end of', __file__)
if __name__ == '__main__':
main()