From 58b93e7eb011fedeba31117fbe8c4be3822bf8f6 Mon Sep 17 00:00:00 2001 From: a-sansara Date: Mon, 17 Sep 2012 19:13:10 +0200 Subject: [PATCH] ImpraIndex adding md5 as key index --- impra/core.py | 94 +++++++++++++++++++++++++++++++++----------------- impra/crypt.py | 16 ++++++--- impra/imap.py | 9 +++-- impra/util.py | 15 ++++++-- 4 files changed, 90 insertions(+), 44 deletions(-) diff --git a/impra/core.py b/impra/core.py index a145247..f8eeac2 100644 --- a/impra/core.py +++ b/impra/core.py @@ -44,8 +44,8 @@ from os import remove, urandom, sep from os.path import abspath, dirname, join, realpath, basename, getsize, splitext from re import split as regsplit, match as regmatch, compile as regcompile, search as regsearch from impra.imap import ImapHelper, ImapConfig -from impra.util import __CALLER__, RuTime, formatBytes, randomFrom, bstr, quote_escape, stack, run, file_exists, get_file_content, DEBUG, DEBUG_ALL, DEBUG_LEVEL, DEBUG_NOTICE, DEBUG_WARN -from impra.crypt import Kirmah, ConfigKey, Noiser, Randomiz, hash_sha256 +from impra.util import __CALLER__, RuTime, formatBytes, randomFrom, bstr, quote_escape, stack, run, file_exists, get_file_content, DEBUG, DEBUG_ALL, DEBUG_LEVEL, DEBUG_NOTICE, DEBUG_WARN, mkdir_p +from impra.crypt import Kirmah, ConfigKey, Noiser, Randomiz, hash_sha256, hash_md5_file @@ -91,7 +91,7 @@ class FSplitter : m = mmap(f.fileno(), 0) p = 0 psize = ceil(getsize(fromPath)/hlst['head'][1]) - print(formatBytes(psize)+' '+str(len(hlst['data']))+' parts') + print(formatBytes(getsize(fromPath))+' on '+str(len(hlst['data']))+' parts (~'+formatBytes(psize)+')') while m.tell() < m.size(): self._splitPart(m,p,psize,hlst['data'][p]) p += 1 @@ -111,12 +111,17 @@ class FSplitter : rt.stop() - def deployFile(self, hlst, ext='', fake=False): + def deployFile(self, hlst, fileName, ext='', dirs=None, fake=False): """""" rt = RuTime(eval(__CALLER__())) p = 0 hlst['data'] = sorted(hlst['data'], key=lambda lst: lst[0]) - fp = open(self.DIR_DEPLOY+hlst['head'][0]+ext, 'wb+') + + if dirs is not None and dirs!='none' : + dirPath = join(self.DIR_DEPLOY,dirs)+sep + mkdir_p(dirPath) + else: dirPath = self.DIR_DEPLOY + fp = open(dirPath+fileName+ext, 'wb+') depDir = self.DIR_INBOX if fake : depDir = self.DIR_OUTBOX while p < hlst['head'][1] : @@ -207,6 +212,23 @@ class ImpraIndex: SEP_KEY_INTERN = '@' """Separator used for internal key such categories""" + MD5 = 7 + """""" + HASH = 0 + """""" + LABEL = 1 + """""" + PARTS = 2 + """""" + EXT = 3 + """""" + OWNER = 4 + """""" + CATG = 5 + """""" + UID = 6 + """""" + def __init__(self, key, mark, encdata='', dicCategory={}, id=0): """Initialize the index with rsa and encoded data @@ -229,12 +251,12 @@ class ImpraIndex: if not self.SEP_KEY_INTERN+k in self.dic: self.dic[self.SEP_KEY_INTERN+k] = dicCategory[k] - def add(self,key, label, count, ext='', usr='', cat=''): + def add(self,key, label, count, ext='', usr='', cat='', md5=''): """Add an entry to the index with appropriate label, key used by entry to decode data, and parts count """ - if self.search(label) == None : - self.dic[label] = (key,label,count,ext,usr,cat, self.id) + if self.search(md5) == None : + self.dic[md5] = (key,label,count,ext,usr,cat, self.id) self.id +=1 else : print(label+' already exist') @@ -267,17 +289,24 @@ class ImpraIndex: """Search the corresponding label in the index""" rt = RuTime(eval(__CALLER__(sid))) l = None - r = [v for i, v in enumerate(self.dic) if not v.startswith(self.SEP_KEY_INTERN) and self.dic[v][6] == int(sid)] + r = [k for i, k in enumerate(self.dic) if not k.startswith(self.SEP_KEY_INTERN) and self.dic[k][self.UID] == int(sid)] if len(r)==1: l = r[0] rt.stop() return l + def searchByFileHash(self,md5): + """""" + e = None + if md5 in self.dic: + e = True + return e + def searchByPattern(self,pattern): """""" rt = RuTime(eval(__CALLER__(pattern))) l = None - r = [ v for i,v in enumerate(self.dic) if not v.startswith(self.SEP_KEY_INTERN) and regsearch(pattern,self.dic[v][1]) is not None ] - l = [self.dic[k][6] for k in r] + r = [ k for i,k in enumerate(self.dic) if not k.startswith(self.SEP_KEY_INTERN) and regsearch(pattern,self.dic[k][self.LABEL]) is not None ] + l = [self.dic[k][self.UID] for k in r] rt.stop() return l @@ -289,14 +318,14 @@ class ImpraIndex: v = self.dic.get(k) k = k.lstrip('\n\r') if not k[0]==self.SEP_KEY_INTERN and len(k)>1: - if matchIds==None or v[6] in matchIds: - data += str(v[6]).rjust(1+ceil(len(str(v[6]))/10),' ')+' ' - data += str(v[0])[0:12]+'... ' - data += str(v[1]).ljust(42,' ')+' ' - data += str(v[2]).rjust(2,'0')+' ' - data += str(v[3]).ljust(5,' ')+' ' - data += self.getUser(str(v[4])).ljust(15,' ')+' ' - data += str(v[5])+' ' + if matchIds==None or v[self.UID] in matchIds: + data += str(v[self.UID]).rjust(1+ceil(len(str(v[self.UID]))/10),' ')+' ' + data += str(k)[0:12]+'... ' + data += str(v[self.LABEL]).ljust(42,' ')+' ' + data += str(v[self.PARTS]).rjust(2,'0')+' ' + data += str(v[self.EXT]).ljust(5,' ')+' ' + data += self.getUser(str(v[self.OWNER])).ljust(15,' ')+' ' + data += str(v[self.CATG])+' ' #~ elif len(k)>1: #~ print(k,'=',v) data = data+self.SEP_ITEM @@ -394,7 +423,7 @@ class ImpraStorage: # getFromFile if uid != None and int(self.idx) == int(uid) and file_exists(self.pathInd): encData = get_file_content(self.pathInd) - print(' index in cache') + print('--\nindex in cache') else: encData = self._getCryptIndex() with open(self.pathInd, mode='w', encoding='utf-8') as o: @@ -441,8 +470,10 @@ class ImpraStorage: #~ self.fsplit.deployFile(hlst,True) _, ext = splitext(path) try: - if self.index.search(label)==None : - hlst = self.fsplit.addFile(path,label) + md5 = hash_md5_file(path) + print('--\nmd5sum `%s` %s' % (path,md5)) + if not self.index.searchByFileHash(md5) : + hlst = self.fsplit.addFile(path,md5) if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE : print(hlst['head']) for v in hlst['data']: @@ -454,11 +485,12 @@ class ImpraStorage: msg = self.mb.build(nameFrom,usr,hlst['head'][2],self.fsplit.DIR_OUTBOX+row[1]+'.ipr') self.ih.send(msg.as_string(), self.rootBox) remove(self.fsplit.DIR_OUTBOX+row[1]+'.ipr') - self.index.add(hlst['head'][3],hlst['head'][0],hlst['head'][1],ext,self.mb.getHashName(usr),catg) + + self.index.add(hlst['head'][3],label,hlst['head'][1],ext,self.mb.getHashName(usr),catg,md5) self.saveIndex() self.conf.set('nid', str(self.index.id),'index') else : - raise Exception(label + ' already exist on server') + print('--\nfile already exist on server as `%s` [id:%i]\n' % (self.index.dic[md5][ImpraIndex.LABEL],self.index.dic[md5][ImpraIndex.UID])) except Exception as e : print(e) rt.stop() @@ -469,12 +501,12 @@ class ImpraStorage: rt = RuTime(eval(__CALLER__('"%s"' % label)),DEBUG_INFO) if label==None : - print(str(label)+' unexist') + print('--\n'+str(label)+' unexist') else : key = self.index.search(label) if label!=None and key!=None: - ck = ConfigKey(key[0]) - count = int(key[2]) + ck = ConfigKey(key[ImpraIndex.HASH]) + count = int(key[ImpraIndex.PARTS]) hlst = ck.getHashList(label,count,True) ids = self._getIdsBySubject(hlst['head'][2]) if len(ids) >= count: @@ -487,16 +519,16 @@ class ImpraStorage: print(hlst['head']) for v in hlst['data']: print(v) - self.fsplit.deployFile(hlst, key[3]) + self.fsplit.deployFile(hlst, key[ImpraIndex.LABEL], key[ImpraIndex.EXT], key[ImpraIndex.CATG]) else : #raise Exception(label+' is private') - print(label+' is private') + print('--\n'+label+' is private') else : #raise Exception(label+' : invalid count parts '+str(len(ids))+'/'+str(count)) - print(label+' : invalid count parts '+str(len(ids))+'/'+str(count)) + print('--\n'+label+' : invalid count parts '+str(len(ids))+'/'+str(count)) else: #raise Exception(str(label)+' not on the server') - print(str(label)+' not on the server') + print('--\n'+str(label)+' not on the server') rt.stop() def clean(self): diff --git a/impra/crypt.py b/impra/crypt.py index e28364d..84c1a6f 100644 --- a/impra/crypt.py +++ b/impra/crypt.py @@ -29,7 +29,7 @@ from impra.util import RuTime, __CALLER__, stack, DEBUG from base64 import urlsafe_b64encode, b64decode from binascii import b2a_base64, a2b_base64 -from hashlib import sha256 +from hashlib import sha256, md5 from math import log, floor, ceil from random import choice from os import urandom @@ -45,6 +45,12 @@ def hash_sha256(data): """ return str(sha256(bytes(data,'utf-8')).hexdigest()) +def hash_md5_file(path): + """Get a md5 hash of file from path + :Returns: `str` + """ + return md5(open(path, mode='rb').read()).hexdigest() + def randomFrom(val, sval=0): """Get a random number from range `sval=0` to `val` :Returns: `int` @@ -229,8 +235,8 @@ class Kirmah: dataEnc = str(b2a_base64(bytes(dataEnc,'utf-8')),'utf-8') #~ dataEnc = self.subenc(odata) - with open('./.KirmahENC', mode='w') as o: - o.write(dataEnc) + #~ with open('./.KirmahENC', mode='w') as o: + #~ o.write(dataEnc) rt.stop() return dataEnc @@ -268,8 +274,8 @@ class Kirmah: #~ dataDec = self.subdec(data) - with open('./.KirmahDEC', mode='w') as o: - o.write(dataDec) + #~ with open('./.KirmahDEC', mode='w') as o: + #~ o.write(dataDec) rt.stop() return dataDec diff --git a/impra/imap.py b/impra/imap.py index a225f03..d5e5615 100755 --- a/impra/imap.py +++ b/impra/imap.py @@ -298,8 +298,8 @@ class ImapHelper: def deleteBin(self): """""" - from impra.util import DEBUG_NOTICE, DEBUG, DEBUG_LEVEL - rt = RuTime(eval(__CALLER__())) + from impra.util import DEBUG_NOTICE, DEBUG, DEBUG_LEVEL, DEBUG_INFO + rt = RuTime(eval(__CALLER__()),DEBUG_INFO) self.srv.select(self.BOX_BIN) ids = self.search('ALL',True) if len(ids) > 0 and ids[0]!='' and ids[0]!=None: @@ -310,9 +310,8 @@ class ImapHelper: #~ print(mid) #status, resp = self.srv.store(mid, '+FLAGS', '\\Deleted') status, resp = self.srv.uid('store', mid, '+FLAGS', '\\Deleted' ) - - if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE: - print('deleting msg '+str(mid)) + print('deleting msg '+str(mid)) + if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE: print(status) print(resp) self.srv.expunge() diff --git a/impra/util.py b/impra/util.py index a7e31f6..0254e92 100755 --- a/impra/util.py +++ b/impra/util.py @@ -32,7 +32,7 @@ from hashlib import sha256 from math import log, floor, ceil from random import choice -from os import urandom, popen, sep +from os import urandom, popen, sep, makedirs from os.path import dirname, realpath, abspath, join from time import time from re import split as regsplit @@ -114,7 +114,16 @@ def file_exists(path): except IOError as e: exist = False return exist - + +def mkdir_p(path): + """""" + try: + makedirs(path) + except OSError as e: # Python >2.5 + if e.errno == errno.EEXIST: + pass + else: raise + def formatBytes(b, p=2): """Give a human representation of bytes size `b` :Returns: `str` @@ -185,7 +194,7 @@ class RuTime: def _start(self): - if self.debugStart :print(' ==> '+self.label) + if self.debug :print(' ==> '+self.label) self.sc = time() def stop(self):