ImpraIndex adding md5 as key index

This commit is contained in:
a-sansara 2012-09-17 19:13:10 +02:00
parent c9460a5e49
commit 58b93e7eb0
4 changed files with 90 additions and 44 deletions

View File

@ -44,8 +44,8 @@ from os import remove, urandom, sep
from os.path import abspath, dirname, join, realpath, basename, getsize, splitext from os.path import abspath, dirname, join, realpath, basename, getsize, splitext
from re import split as regsplit, match as regmatch, compile as regcompile, search as regsearch from re import split as regsplit, match as regmatch, compile as regcompile, search as regsearch
from impra.imap import ImapHelper, ImapConfig from impra.imap import ImapHelper, ImapConfig
from impra.util import __CALLER__, RuTime, formatBytes, randomFrom, bstr, quote_escape, stack, run, file_exists, get_file_content, DEBUG, DEBUG_ALL, DEBUG_LEVEL, DEBUG_NOTICE, DEBUG_WARN from impra.util import __CALLER__, RuTime, formatBytes, randomFrom, bstr, quote_escape, stack, run, file_exists, get_file_content, DEBUG, DEBUG_ALL, DEBUG_LEVEL, DEBUG_NOTICE, DEBUG_WARN, mkdir_p
from impra.crypt import Kirmah, ConfigKey, Noiser, Randomiz, hash_sha256 from impra.crypt import Kirmah, ConfigKey, Noiser, Randomiz, hash_sha256, hash_md5_file
@ -91,7 +91,7 @@ class FSplitter :
m = mmap(f.fileno(), 0) m = mmap(f.fileno(), 0)
p = 0 p = 0
psize = ceil(getsize(fromPath)/hlst['head'][1]) psize = ceil(getsize(fromPath)/hlst['head'][1])
print(formatBytes(psize)+' '+str(len(hlst['data']))+' parts') print(formatBytes(getsize(fromPath))+' on '+str(len(hlst['data']))+' parts (~'+formatBytes(psize)+')')
while m.tell() < m.size(): while m.tell() < m.size():
self._splitPart(m,p,psize,hlst['data'][p]) self._splitPart(m,p,psize,hlst['data'][p])
p += 1 p += 1
@ -111,12 +111,17 @@ class FSplitter :
rt.stop() rt.stop()
def deployFile(self, hlst, ext='', fake=False): def deployFile(self, hlst, fileName, ext='', dirs=None, fake=False):
"""""" """"""
rt = RuTime(eval(__CALLER__())) rt = RuTime(eval(__CALLER__()))
p = 0 p = 0
hlst['data'] = sorted(hlst['data'], key=lambda lst: lst[0]) hlst['data'] = sorted(hlst['data'], key=lambda lst: lst[0])
fp = open(self.DIR_DEPLOY+hlst['head'][0]+ext, 'wb+')
if dirs is not None and dirs!='none' :
dirPath = join(self.DIR_DEPLOY,dirs)+sep
mkdir_p(dirPath)
else: dirPath = self.DIR_DEPLOY
fp = open(dirPath+fileName+ext, 'wb+')
depDir = self.DIR_INBOX depDir = self.DIR_INBOX
if fake : depDir = self.DIR_OUTBOX if fake : depDir = self.DIR_OUTBOX
while p < hlst['head'][1] : while p < hlst['head'][1] :
@ -207,6 +212,23 @@ class ImpraIndex:
SEP_KEY_INTERN = '@' SEP_KEY_INTERN = '@'
"""Separator used for internal key such categories""" """Separator used for internal key such categories"""
MD5 = 7
""""""
HASH = 0
""""""
LABEL = 1
""""""
PARTS = 2
""""""
EXT = 3
""""""
OWNER = 4
""""""
CATG = 5
""""""
UID = 6
""""""
def __init__(self, key, mark, encdata='', dicCategory={}, id=0): def __init__(self, key, mark, encdata='', dicCategory={}, id=0):
"""Initialize the index with rsa and encoded data """Initialize the index with rsa and encoded data
@ -229,12 +251,12 @@ class ImpraIndex:
if not self.SEP_KEY_INTERN+k in self.dic: if not self.SEP_KEY_INTERN+k in self.dic:
self.dic[self.SEP_KEY_INTERN+k] = dicCategory[k] self.dic[self.SEP_KEY_INTERN+k] = dicCategory[k]
def add(self,key, label, count, ext='', usr='', cat=''): def add(self,key, label, count, ext='', usr='', cat='', md5=''):
"""Add an entry to the index with appropriate label, key used by entry """Add an entry to the index with appropriate label, key used by entry
to decode data, and parts count to decode data, and parts count
""" """
if self.search(label) == None : if self.search(md5) == None :
self.dic[label] = (key,label,count,ext,usr,cat, self.id) self.dic[md5] = (key,label,count,ext,usr,cat, self.id)
self.id +=1 self.id +=1
else : else :
print(label+' already exist') print(label+' already exist')
@ -267,17 +289,24 @@ class ImpraIndex:
"""Search the corresponding label in the index""" """Search the corresponding label in the index"""
rt = RuTime(eval(__CALLER__(sid))) rt = RuTime(eval(__CALLER__(sid)))
l = None l = None
r = [v for i, v in enumerate(self.dic) if not v.startswith(self.SEP_KEY_INTERN) and self.dic[v][6] == int(sid)] r = [k for i, k in enumerate(self.dic) if not k.startswith(self.SEP_KEY_INTERN) and self.dic[k][self.UID] == int(sid)]
if len(r)==1: l = r[0] if len(r)==1: l = r[0]
rt.stop() rt.stop()
return l return l
def searchByFileHash(self,md5):
""""""
e = None
if md5 in self.dic:
e = True
return e
def searchByPattern(self,pattern): def searchByPattern(self,pattern):
"""""" """"""
rt = RuTime(eval(__CALLER__(pattern))) rt = RuTime(eval(__CALLER__(pattern)))
l = None l = None
r = [ v for i,v in enumerate(self.dic) if not v.startswith(self.SEP_KEY_INTERN) and regsearch(pattern,self.dic[v][1]) is not None ] r = [ k for i,k in enumerate(self.dic) if not k.startswith(self.SEP_KEY_INTERN) and regsearch(pattern,self.dic[k][self.LABEL]) is not None ]
l = [self.dic[k][6] for k in r] l = [self.dic[k][self.UID] for k in r]
rt.stop() rt.stop()
return l return l
@ -289,14 +318,14 @@ class ImpraIndex:
v = self.dic.get(k) v = self.dic.get(k)
k = k.lstrip('\n\r') k = k.lstrip('\n\r')
if not k[0]==self.SEP_KEY_INTERN and len(k)>1: if not k[0]==self.SEP_KEY_INTERN and len(k)>1:
if matchIds==None or v[6] in matchIds: if matchIds==None or v[self.UID] in matchIds:
data += str(v[6]).rjust(1+ceil(len(str(v[6]))/10),' ')+' ' data += str(v[self.UID]).rjust(1+ceil(len(str(v[self.UID]))/10),' ')+' '
data += str(v[0])[0:12]+'... ' data += str(k)[0:12]+'... '
data += str(v[1]).ljust(42,' ')+' ' data += str(v[self.LABEL]).ljust(42,' ')+' '
data += str(v[2]).rjust(2,'0')+' ' data += str(v[self.PARTS]).rjust(2,'0')+' '
data += str(v[3]).ljust(5,' ')+' ' data += str(v[self.EXT]).ljust(5,' ')+' '
data += self.getUser(str(v[4])).ljust(15,' ')+' ' data += self.getUser(str(v[self.OWNER])).ljust(15,' ')+' '
data += str(v[5])+' ' data += str(v[self.CATG])+' '
#~ elif len(k)>1: #~ elif len(k)>1:
#~ print(k,'=',v) #~ print(k,'=',v)
data = data+self.SEP_ITEM data = data+self.SEP_ITEM
@ -394,7 +423,7 @@ class ImpraStorage:
# getFromFile # getFromFile
if uid != None and int(self.idx) == int(uid) and file_exists(self.pathInd): if uid != None and int(self.idx) == int(uid) and file_exists(self.pathInd):
encData = get_file_content(self.pathInd) encData = get_file_content(self.pathInd)
print(' index in cache') print('--\nindex in cache')
else: else:
encData = self._getCryptIndex() encData = self._getCryptIndex()
with open(self.pathInd, mode='w', encoding='utf-8') as o: with open(self.pathInd, mode='w', encoding='utf-8') as o:
@ -441,8 +470,10 @@ class ImpraStorage:
#~ self.fsplit.deployFile(hlst,True) #~ self.fsplit.deployFile(hlst,True)
_, ext = splitext(path) _, ext = splitext(path)
try: try:
if self.index.search(label)==None : md5 = hash_md5_file(path)
hlst = self.fsplit.addFile(path,label) print('--\nmd5sum `%s` %s' % (path,md5))
if not self.index.searchByFileHash(md5) :
hlst = self.fsplit.addFile(path,md5)
if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE : if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE :
print(hlst['head']) print(hlst['head'])
for v in hlst['data']: for v in hlst['data']:
@ -454,11 +485,12 @@ class ImpraStorage:
msg = self.mb.build(nameFrom,usr,hlst['head'][2],self.fsplit.DIR_OUTBOX+row[1]+'.ipr') msg = self.mb.build(nameFrom,usr,hlst['head'][2],self.fsplit.DIR_OUTBOX+row[1]+'.ipr')
self.ih.send(msg.as_string(), self.rootBox) self.ih.send(msg.as_string(), self.rootBox)
remove(self.fsplit.DIR_OUTBOX+row[1]+'.ipr') remove(self.fsplit.DIR_OUTBOX+row[1]+'.ipr')
self.index.add(hlst['head'][3],hlst['head'][0],hlst['head'][1],ext,self.mb.getHashName(usr),catg)
self.index.add(hlst['head'][3],label,hlst['head'][1],ext,self.mb.getHashName(usr),catg,md5)
self.saveIndex() self.saveIndex()
self.conf.set('nid', str(self.index.id),'index') self.conf.set('nid', str(self.index.id),'index')
else : else :
raise Exception(label + ' already exist on server') print('--\nfile already exist on server as `%s` [id:%i]\n' % (self.index.dic[md5][ImpraIndex.LABEL],self.index.dic[md5][ImpraIndex.UID]))
except Exception as e : except Exception as e :
print(e) print(e)
rt.stop() rt.stop()
@ -469,12 +501,12 @@ class ImpraStorage:
rt = RuTime(eval(__CALLER__('"%s"' % label)),DEBUG_INFO) rt = RuTime(eval(__CALLER__('"%s"' % label)),DEBUG_INFO)
if label==None : if label==None :
print(str(label)+' unexist') print('--\n'+str(label)+' unexist')
else : else :
key = self.index.search(label) key = self.index.search(label)
if label!=None and key!=None: if label!=None and key!=None:
ck = ConfigKey(key[0]) ck = ConfigKey(key[ImpraIndex.HASH])
count = int(key[2]) count = int(key[ImpraIndex.PARTS])
hlst = ck.getHashList(label,count,True) hlst = ck.getHashList(label,count,True)
ids = self._getIdsBySubject(hlst['head'][2]) ids = self._getIdsBySubject(hlst['head'][2])
if len(ids) >= count: if len(ids) >= count:
@ -487,16 +519,16 @@ class ImpraStorage:
print(hlst['head']) print(hlst['head'])
for v in hlst['data']: for v in hlst['data']:
print(v) print(v)
self.fsplit.deployFile(hlst, key[3]) self.fsplit.deployFile(hlst, key[ImpraIndex.LABEL], key[ImpraIndex.EXT], key[ImpraIndex.CATG])
else : else :
#raise Exception(label+' is private') #raise Exception(label+' is private')
print(label+' is private') print('--\n'+label+' is private')
else : else :
#raise Exception(label+' : invalid count parts '+str(len(ids))+'/'+str(count)) #raise Exception(label+' : invalid count parts '+str(len(ids))+'/'+str(count))
print(label+' : invalid count parts '+str(len(ids))+'/'+str(count)) print('--\n'+label+' : invalid count parts '+str(len(ids))+'/'+str(count))
else: else:
#raise Exception(str(label)+' not on the server') #raise Exception(str(label)+' not on the server')
print(str(label)+' not on the server') print('--\n'+str(label)+' not on the server')
rt.stop() rt.stop()
def clean(self): def clean(self):

View File

@ -29,7 +29,7 @@
from impra.util import RuTime, __CALLER__, stack, DEBUG from impra.util import RuTime, __CALLER__, stack, DEBUG
from base64 import urlsafe_b64encode, b64decode from base64 import urlsafe_b64encode, b64decode
from binascii import b2a_base64, a2b_base64 from binascii import b2a_base64, a2b_base64
from hashlib import sha256 from hashlib import sha256, md5
from math import log, floor, ceil from math import log, floor, ceil
from random import choice from random import choice
from os import urandom from os import urandom
@ -45,6 +45,12 @@ def hash_sha256(data):
""" """
return str(sha256(bytes(data,'utf-8')).hexdigest()) return str(sha256(bytes(data,'utf-8')).hexdigest())
def hash_md5_file(path):
"""Get a md5 hash of file from path
:Returns: `str`
"""
return md5(open(path, mode='rb').read()).hexdigest()
def randomFrom(val, sval=0): def randomFrom(val, sval=0):
"""Get a random number from range `sval=0` to `val` """Get a random number from range `sval=0` to `val`
:Returns: `int` :Returns: `int`
@ -229,8 +235,8 @@ class Kirmah:
dataEnc = str(b2a_base64(bytes(dataEnc,'utf-8')),'utf-8') dataEnc = str(b2a_base64(bytes(dataEnc,'utf-8')),'utf-8')
#~ dataEnc = self.subenc(odata) #~ dataEnc = self.subenc(odata)
with open('./.KirmahENC', mode='w') as o: #~ with open('./.KirmahENC', mode='w') as o:
o.write(dataEnc) #~ o.write(dataEnc)
rt.stop() rt.stop()
return dataEnc return dataEnc
@ -268,8 +274,8 @@ class Kirmah:
#~ dataDec = self.subdec(data) #~ dataDec = self.subdec(data)
with open('./.KirmahDEC', mode='w') as o: #~ with open('./.KirmahDEC', mode='w') as o:
o.write(dataDec) #~ o.write(dataDec)
rt.stop() rt.stop()
return dataDec return dataDec

View File

@ -298,8 +298,8 @@ class ImapHelper:
def deleteBin(self): def deleteBin(self):
"""""" """"""
from impra.util import DEBUG_NOTICE, DEBUG, DEBUG_LEVEL from impra.util import DEBUG_NOTICE, DEBUG, DEBUG_LEVEL, DEBUG_INFO
rt = RuTime(eval(__CALLER__())) rt = RuTime(eval(__CALLER__()),DEBUG_INFO)
self.srv.select(self.BOX_BIN) self.srv.select(self.BOX_BIN)
ids = self.search('ALL',True) ids = self.search('ALL',True)
if len(ids) > 0 and ids[0]!='' and ids[0]!=None: if len(ids) > 0 and ids[0]!='' and ids[0]!=None:
@ -310,9 +310,8 @@ class ImapHelper:
#~ print(mid) #~ print(mid)
#status, resp = self.srv.store(mid, '+FLAGS', '\\Deleted') #status, resp = self.srv.store(mid, '+FLAGS', '\\Deleted')
status, resp = self.srv.uid('store', mid, '+FLAGS', '\\Deleted' ) status, resp = self.srv.uid('store', mid, '+FLAGS', '\\Deleted' )
if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE:
print('deleting msg '+str(mid)) print('deleting msg '+str(mid))
if DEBUG and DEBUG_LEVEL <= DEBUG_NOTICE:
print(status) print(status)
print(resp) print(resp)
self.srv.expunge() self.srv.expunge()

View File

@ -32,7 +32,7 @@
from hashlib import sha256 from hashlib import sha256
from math import log, floor, ceil from math import log, floor, ceil
from random import choice from random import choice
from os import urandom, popen, sep from os import urandom, popen, sep, makedirs
from os.path import dirname, realpath, abspath, join from os.path import dirname, realpath, abspath, join
from time import time from time import time
from re import split as regsplit from re import split as regsplit
@ -115,6 +115,15 @@ def file_exists(path):
exist = False exist = False
return exist return exist
def mkdir_p(path):
""""""
try:
makedirs(path)
except OSError as e: # Python >2.5
if e.errno == errno.EEXIST:
pass
else: raise
def formatBytes(b, p=2): def formatBytes(b, p=2):
"""Give a human representation of bytes size `b` """Give a human representation of bytes size `b`
:Returns: `str` :Returns: `str`
@ -185,7 +194,7 @@ class RuTime:
def _start(self): def _start(self):
if self.debugStart :print(' ==> '+self.label) if self.debug :print(' ==> '+self.label)
self.sc = time() self.sc = time()
def stop(self): def stop(self):