Module similarity
[hide private]
[frames] | no frames]

Source Code for Module similarity

  1  # This file is part of Androguard. 
  2  # 
  3  # Copyright (C) 2010, Anthony Desnos <desnos at t0t0.org> 
  4  # All rights reserved. 
  5  # 
  6  # Androguard is free software: you can redistribute it and/or modify 
  7  # it under the terms of the GNU Lesser General Public License as published by 
  8  # the Free Software Foundation, either version 3 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # Androguard is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU Lesser General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU Lesser General Public License 
 17  # along with Androguard.  If not, see <http://www.gnu.org/licenses/>. 
 18   
 19  import hashlib 
 20   
 21  from ctypes import cdll, c_float, c_int, c_uint, c_void_p, Structure, addressof, create_string_buffer, cast 
 22   
 23  #struct libsimilarity { 
 24  #   void *orig; 
 25  #   unsigned int size_orig; 
 26  #   void *cmp; 
 27  #   unsigned size_cmp; 
 28   
 29  #   unsigned int *corig; 
 30  #   unsigned int *ccmp; 
 31  #    
 32  #   float res; 
 33  #}; 
34 -class LIBSIMILARITY_T(Structure) :
35 _fields_ = [("orig", c_void_p), 36 ("size_orig", c_uint), 37 ("cmp", c_void_p), 38 ("size_cmp", c_uint), 39 40 ("corig", c_uint), 41 ("ccmp", c_uint), 42 43 ("res", c_float), 44 ]
45 46 ZLIB_COMPRESS = 0 47 BZ2_COMPRESS = 1 48 SMAZ_COMPRESS = 2 49 LZMA_COMPRESS = 3 50 XZ_COMPRESS = 4 51 SNAPPY_COMPRESS = 5
52 -class SIMILARITY :
53 - def __init__(self, path="./libsimilarity/libsimilarity.so") :
54 self._u = cdll.LoadLibrary( path ) 55 56 self._u.compress.restype = c_uint 57 self._u.ncd.restype = c_int 58 self._u.ncs.restype = c_int 59 self._u.cmid.restype = c_int 60 self._u.entropy.restype = c_float 61 self._u.levenshtein.restype = c_uint 62 63 self._level = 9 64 65 self.__libsim_t = LIBSIMILARITY_T() 66 67 self.__caches = { 68 ZLIB_COMPRESS : {}, 69 BZ2_COMPRESS : {}, 70 SMAZ_COMPRESS : {}, 71 LZMA_COMPRESS : {}, 72 XZ_COMPRESS : {}, 73 SNAPPY_COMPRESS : {}, 74 } 75 76 self.set_compress_type( ZLIB_COMPRESS )
77
78 - def set_level(self, level) :
79 self._level = level
80
81 - def get_in_caches(self, s) :
82 try : 83 return self.__caches[ self._type ][ hashlib.md5( s ).hexdigest() ] 84 except KeyError : 85 return c_uint( 0 )
86
87 - def add_in_caches(self, s, v) :
88 h = hashlib.md5( s ).hexdigest() 89 if h not in self.__caches[ self._type ] : 90 self.__caches[ self._type ][ h ] = v
91
92 - def clear_caches(self) :
93 for i in self.__caches : 94 self.__caches[i] = {}
95
96 - def compress(self, s1) :
97 res = self._u.compress( self._level, cast( s1, c_void_p ), len( s1 ) ) 98 return res
99
100 - def _sim(self, s1, s2, func) :
101 self.__libsim_t.orig = cast( s1, c_void_p ) 102 self.__libsim_t.size_orig = len(s1) 103 104 self.__libsim_t.cmp = cast( s2, c_void_p ) 105 self.__libsim_t.size_cmp = len(s2) 106 107 corig = self.get_in_caches(s1) 108 ccmp = self.get_in_caches(s2) 109 self.__libsim_t.corig = addressof( corig ) 110 self.__libsim_t.ccmp = addressof( ccmp ) 111 112 ret = func( self._level, addressof( self.__libsim_t ) ) 113 114 self.add_in_caches(s1, corig) 115 self.add_in_caches(s2, ccmp) 116 117 return self.__libsim_t.res
118
119 - def ncd(self, s1, s2) :
120 return self._sim( s1, s2, self._u.ncd )
121
122 - def ncs(self, s1, s2) :
123 return self._sim( s1, s2, self._u.ncs )
124
125 - def cmid(self, s1, s2) :
126 return self._sim( s1, s2, self._u.cmid )
127
128 - def entropy(self, s1) :
129 res = self._u.entropy( cast( s1, c_void_p ), len( s1 ) ) 130 return res
131
132 - def levenshtein(self, s1, s2) :
133 res = self._u.levenshtein( cast( s1, c_void_p ), len( s1 ), cast( s2, c_void_p ), len( s2 ) ) 134 return res
135
136 - def set_compress_type(self, t):
137 self._type = t 138 self._u.set_compress_type(t)
139