Module androguard
[hide private]
[frames] | no frames]

Source Code for Module androguard

  1  # This file is part of Androguard. 
  2  # 
  3  # Copyright (C) 2010, Anthony Desnos <desnos at t0t0.org> 
  4  # All rights reserved. 
  5  # 
  6  # Androguard is free software: you can redistribute it and/or modify 
  7  # it under the terms of the GNU Lesser General Public License as published by 
  8  # the Free Software Foundation, either version 3 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # Androguard is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU Lesser General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU Lesser General Public License 
 17  # along with Androguard.  If not, see <http://www.gnu.org/licenses/>. 
 18   
 19  import sys, xml.dom.minidom, re, random, string, os 
 20   
 21  PATH_INSTALL = "./" 
 22   
 23  sys.path.append(PATH_INSTALL + "/core") 
 24  sys.path.append(PATH_INSTALL + "/core/bytecodes") 
 25  sys.path.append(PATH_INSTALL + "/core/predicates") 
 26  sys.path.append(PATH_INSTALL + "/core/analysis") 
 27  sys.path.append(PATH_INSTALL + "/core/vm") 
 28  sys.path.append(PATH_INSTALL + "/core/wm") 
 29  sys.path.append(PATH_INSTALL + "/core/protection") 
 30  sys.path.append(PATH_INSTALL + "/classification") 
 31   
 32  import bytecode, jvm, dvm, apk, misc, analysis, opaque 
 33  from error import error 
 34   
 35  VM_INT_AUTO = 0 
 36  VM_INT_BASIC_MATH_FORMULA = 1 
 37  VM_INT_BASIC_PRNG = 2 
 38  INVERT_VM_INT_TYPE = { "VM_INT_AUTO" : VM_INT_AUTO, 
 39                         "VM_INT_BASIC_MATH_FORMULA" : VM_INT_BASIC_MATH_FORMULA, 
 40                         "VM_INT_BASIC_PRNG" : VM_INT_BASIC_PRNG 
 41                       } 
42 -class VM_int :
43 """VM_int is the main high level Virtual Machine object to protect a method by remplacing all integer contants 44 45 @param andro : an L{Androguard} / L{AndroguardS} object to have full access to the desired information 46 @param class_name : the class of the method 47 @param method_name : the name of the method to protect 48 @param descriptor : the descriptor of the method 49 @param vm_int_type : the type of the Virtual Machine 50 """
51 - def __init__(self, andro, class_name, method_name, descriptor, vm_int_type) :
52 import vm 53 54 method, _vm = andro.get_method_descriptor(class_name, method_name, descriptor) 55 code = method.get_code() 56 57 # LOOP until integers constant ! 58 iip = True 59 while iip == True : 60 idx = 0 61 end_iip = True 62 for bc in code.get_bc().get() : 63 if bc.get_name() in _vm.get_INTEGER_INSTRUCTIONS() : 64 if vm_int_type == VM_INT_BASIC_MATH_FORMULA : 65 vi = vm.VM_int_basic_math_formula( class_name, code, idx ) 66 elif vm_int_type == VM_INT_BASIC_PRNG : 67 vi = vm.VM_int_basic_prng( class_name, code, idx ) 68 else : 69 raise("oops") 70 71 for new_method in vi.get_methods() : 72 _vm.insert_direct_method( new_method.get_name(), new_method ) 73 vi.patch_code() 74 75 end_iip = False 76 77 break 78 idx += 1 79 80 # We have patched zero integers, it's the end my friend ! 81 if end_iip == True : 82 iip = False 83 84 method.show()
85
86 -class WM :
87 - def __init__(self, andro, class_name, wm_type) :
88 if wm_type == [] : 89 raise("....") 90 91 import wm 92 self._w = wm.WM( andro.get_vm(), class_name, wm_type, andro.get_analysis() )
93
94 - def get(self) :
95 return self._w
96
97 -class WMCheck :
98 - def __init__(self, andro, class_name, input_file) :
99 fd = open(input_file, "rb") 100 buffxml = fd.read() 101 fd.close() 102 103 document = xml.dom.minidom.parseString(buffxml) 104 105 w_orig = wm.WMLoad( document ) 106 w_cmp = wm.WMCheck( w_orig, andro, andro.get_analysis() )
107
108 -def OBFU_NAMES_GEN(prefix="") :
109 return prefix + random.choice( string.letters ) + ''.join([ random.choice(string.letters + string.digits) for i in range(10 - 1) ] )
110 111 OBFU_NAMES_FIELDS = 0 112 OBFU_NAMES_METHODS = 1
113 -class OBFU_Names :
114 """ 115 OBFU_Names is the object that change the name of a field or a method by a random string, and resolving 116 dependencies into other files 117 118 @param andro : an L{Androguard} object to have full access to the desired information, and represented a pool of files with the same format 119 @param class_name : the class of the method/field (a python regexp) 120 @param name : the name of the method/field (a python regexp) 121 @param descriptor : the descriptor of the method/field (a python regexp) 122 @param obfu_type : the type of the obfuscated (field/method) (OBFU_NAMES_FIELDS, OBFU_NAMES_METHODS) 123 @param gen_method : a method which generate random string 124 """
125 - def __init__(self, andro, class_name, name, descriptor, obfu_type, gen_method=OBFU_NAMES_GEN) :
126 if obfu_type != OBFU_NAMES_FIELDS and obfu_type != OBFU_NAMES_METHODS : 127 raise("ooops") 128 129 re_class_name = re.compile(class_name) 130 re_name = re.compile(name) 131 re_descriptor = re.compile(descriptor) 132 133 if obfu_type == OBFU_NAMES_FIELDS : 134 search_in = andro.gets("fields") 135 elif obfu_type == OBFU_NAMES_METHODS : 136 search_in = andro.gets("methods") 137 138 depends = [] 139 140 # Change the name of all fields/methods 141 for fm in search_in : 142 if re_class_name.match( fm.get_class_name() ) : 143 if re_name.match( fm.get_name() ): 144 if re_descriptor.match( fm.get_descriptor() ) : 145 _, _vm = andro.get_method_descriptor( fm.get_class_name(), fm.get_name(), fm.get_descriptor() ) 146 old_name = fm.get_name() 147 new_name = gen_method() 148 149 # don't change the constructor for a .class file 150 if obfu_type == OBFU_NAMES_METHODS : 151 _, _vm = andro.get_method_descriptor( fm.get_class_name(), fm.get_name(), fm.get_descriptor() ) 152 if _vm.get_type() == "JVM" and old_name != "<init>" : 153 fm.set_name( new_name ) 154 depends.append( (fm, old_name) ) 155 elif obfu_type == OBFU_NAMES_FIELDS : 156 fm.set_name( new_name ) 157 depends.append( (fm, old_name) ) 158 159 # Change the name in others files 160 for i in depends : 161 for _vm in andro.get_vms() : 162 if obfu_type == OBFU_NAMES_FIELDS : 163 _vm.set_used_field( [ i[0].get_class_name(), i[1], i[0].get_descriptor() ], [ i[0].get_class_name(), i[0].get_name(), i[0].get_descriptor() ] ) 164 elif obfu_type == OBFU_NAMES_METHODS : 165 _vm.set_used_method( [ i[0].get_class_name(), i[1], i[0].get_descriptor() ], [ i[0].get_class_name(), i[0].get_name(), i[0].get_descriptor() ] )
166
167 -class BC :
168 - def __init__(self, bc) :
169 self.__bc = bc
170
171 - def get_vm(self) :
172 return self.__bc
173
174 - def get_analysis(self) :
175 return self.__a
176
177 - def analyze(self) :
178 self.__a = analysis.VMAnalysis( self.__bc, code_analysis=True )
179
180 - def _get(self, val, name) :
181 l = [] 182 r = getattr(self.__bc, val)(name) 183 for i in r : 184 l.append( i ) 185 return l
186
187 - def _gets(self, val) :
188 l = [] 189 r = getattr(self.__bc, val)() 190 for i in r : 191 l.append( i ) 192 return l
193
194 - def gets(self, name) :
195 return self._gets("get_" + name)
196
197 - def get(self, val, name) :
198 return self._get("get_" + val, name)
199
200 - def insert_direct_method(self, name, method) :
201 return self.__bc.insert_direct_method(name, method)
202
203 - def insert_craft_method(self, name, proto, codes) :
204 return self.__bc.insert_craft_method( name, proto, codes)
205
206 - def show(self) :
207 self.__bc.show()
208
209 - def pretty_show(self) :
210 self.__bc.pretty_show( self.__a )
211
212 - def save(self) :
213 return self.__bc.save()
214
215 - def __getattr__(self, value) :
216 return getattr(self.__bc, value)
217 218 PROTECT_VM_AUTO = "protect_vm_auto" 219 PROTECT_VM_INTEGER = "protect_vm_integer" 220 PROTECT_VM_INTEGER_TYPE = "protect_vm_integer_type" 221
222 -class Androguard :
223 """Androguard is the main object to abstract and manage differents formats 224 225 @param files : a list of filenames (filename must be terminated by .class or .dex) 226 @param raw : specify if the filename is in fact a raw buffer (default : False) #FIXME 227 """
228 - def __init__(self, files, raw=False) :
229 self.__files = files 230 231 self.__orig_raw = {} 232 for i in self.__files : 233 self.__orig_raw[ i ] = open(i, "rb").read() 234 235 self.__bc = [] 236 self._analyze()
237
238 - def _iterFlatten(self, root):
239 if isinstance(root, (list, tuple)): 240 for element in root : 241 for e in self._iterFlatten(element) : 242 yield e 243 else: 244 yield root
245
246 - def _analyze(self) :
247 for i in self.__files : 248 #print "processing ", i 249 if ".class" in i : 250 bc = jvm.JVMFormat( self.__orig_raw[ i ] ) 251 elif ".jar" in i : 252 x = jvm.JAR( i ) 253 bc = x.get_classes() 254 elif ".dex" in i : 255 bc = dvm.DalvikVMFormat( self.__orig_raw[ i ] ) 256 elif ".apk" in i : 257 x = apk.APK( i ) 258 bc = dvm.DalvikVMFormat( x.get_dex() ) 259 else : 260 raise( "Unknown bytecode" ) 261 262 if isinstance(bc, list) : 263 for j in bc : 264 self.__bc.append( (j[0], BC( jvm.JVMFormat(j[1]) ) ) ) 265 else : 266 self.__bc.append( (i, BC( bc )) )
267
268 - def ianalyze(self) :
269 for i in self.get_bc() : 270 i[1].analyze()
271
272 - def get_class(self, class_name) :
273 for _, bc in self.__bc : 274 if bc.get_class(class_name) == True : 275 return bc 276 return None
277
278 - def get_raw(self) :
279 """Return raw format of all file""" 280 l = [] 281 for _, bc in self.__bc : 282 l.append( bc._get_raw() ) 283 return l
284
285 - def get_orig_raw(self) :
286 return self.__orig_raw
287
288 - def get_method_descriptor(self, class_name, method_name, descriptor) :
289 """ 290 Return the specific method 291 292 @param class_name : the class name of the method 293 @param method_name : the name of the method 294 @param descriptor : the descriptor of the method 295 """ 296 for file_name, bc in self.__bc : 297 x = bc.get_method_descriptor( class_name, method_name, descriptor ) 298 if x != None : 299 return x, bc 300 return None, None
301
302 - def get_field_descriptor(self, class_name, field_name, descriptor) :
303 """ 304 Return the specific field 305 306 @param class_name : the class name of the field 307 @param field_name : the name of the field 308 @param descriptor : the descriptor of the field 309 """ 310 for file_name, bc in self.__bc : 311 x = bc.get_field_descriptor( class_name, field_name, descriptor ) 312 if x != None : 313 return x, bc 314 return None, None
315
316 - def get(self, name, val) :
317 """ 318 Return the specific value for all files 319 320 @param name : 321 @param val : 322 """ 323 if name == "file" : 324 for file_name, bc in self.__bc : 325 if file_name == val : 326 return bc 327 328 return None 329 else : 330 l = [] 331 for file_name, bc in self.__bc : 332 l.append( bc.get( name, val ) ) 333 334 return list( self._iterFlatten(l) )
335
336 - def gets(self, name) :
337 """ 338 Return the specific value for all files 339 340 @param name : 341 """ 342 l = [] 343 for file_name, bc in self.__bc : 344 l.append( bc.gets( name ) ) 345 346 return list( self._iterFlatten(l) )
347
348 - def get_vms(self) :
349 return [ i[1].get_vm() for i in self.__bc ]
350
351 - def get_bc(self) :
352 return self.__bc
353
354 - def show(self) :
355 """ 356 Display all files 357 """ 358 for _, bc in self.__bc : 359 bc.show()
360
361 - def pretty_show(self) :
362 """ 363 Display all files 364 """ 365 for _, bc in self.__bc : 366 bc.pretty_show()
367
368 - def do(self, fileconf) :
369 self.ianalyze() 370 371 fd = open(fileconf, "rb") 372 buffxml = fd.read() 373 fd.close() 374 375 document = xml.dom.minidom.parseString(buffxml) 376 377 main_path = document.getElementsByTagName( "main_path" )[0].firstChild.data 378 libs_path = document.getElementsByTagName( "libs_path" )[0].firstChild.data 379 380 if document.getElementsByTagName( "watermark" ) != [] : 381 watermark_item = document.getElementsByTagName( "watermark" )[0] 382 watermark_types = [] 383 for item in watermark_item.getElementsByTagName( "type" ) : 384 watermark_types.append( str( item.firstChild.data ) ) 385 watermark_output = watermark_item.getElementsByTagName( "output" )[0].firstChild.data 386 print watermark_types, "--->", watermark_output 387 388 fd = open(watermark_output, "w") 389 390 fd.write("<?xml version=\"1.0\"?>\n") 391 fd.write("<andro id=\"androguard wm\">\n") 392 wms = [] 393 for i in self.get_bc() : 394 for class_name in i[1].get_classes_names() : 395 wm = WM( i[1], class_name, watermark_types ) 396 fd.write( wm.get().save() ) 397 fd.write("</andro>\n") 398 fd.close() 399 400 if document.getElementsByTagName( "protect_code" ) != [] : 401 import protection 402 403 protect_code_item = document.getElementsByTagName( "protect_code" )[0] 404 protection.ProtectCode( [ i[1] for i in self.get_bc() ], main_path + libs_path ) 405 406 # for item in document.getElementsByTagName('method') : 407 # if item.getElementsByTagName( PROTECT_VM_INTEGER )[0].firstChild != None : 408 # if item.getElementsByTagName( PROTECT_VM_INTEGER )[0].firstChild.data == "1" : 409 # vm_type = INVERT_VM_INT_TYPE[ item.getElementsByTagName( PROTECT_VM_INTEGER_TYPE )[0].firstChild.data ] 410 # VM_int( self, item.getAttribute('class'), item.getAttribute('name'), item.getAttribute('descriptor'), vm_type ) 411 412 if document.getElementsByTagName( "save_path" ) != [] : 413 self.save( main_path + document.getElementsByTagName( "save_path" )[0].firstChild.data ) 414 else : 415 self.save()
416
417 - def save(self, output_dir=None) :
418 for file_name, bc in self.get_bc() : 419 if output_dir == None : 420 output_file_name = file_name 421 else : 422 output_file_name = output_dir + os.path.basename( file_name ) 423 424 print "[+] [AG] SAVING ... ", output_file_name 425 fd = open(output_file_name, "w") 426 fd.write( bc.save() ) 427 fd.close()
428
429 -class AndroguardS :
430 """AndroguardS is the main object to abstract and manage differents formats but only per filename. In fact this class is just a wrapper to the main class Androguard 431 432 @param filename : the filename to use (filename must be terminated by .class or .dex) 433 @param raw : specify if the filename is a raw buffer (default : False) 434 """
435 - def __init__(self, filename, raw=False) :
436 self.__filename = filename 437 self.__orig_a = Androguard( [ filename ], raw ) 438 self.__a = self.__orig_a.get( "file", filename )
439
440 - def get_orig_raw(self) :
441 return self.__orig_a.get_orig_raw()[ self.__filename ]
442
443 - def get_vm(self) :
444 """ 445 This method returns the VMFormat which correspond to the file 446 447 @rtype: L{jvm.JVMFormat} or L{dvm.DalvikVMFormat} 448 """ 449 return self.__a.get_vm()
450
451 - def save(self) :
452 """ 453 Return the original format (with the modifications) into raw format 454 455 @rtype: string 456 """ 457 return self.__a.save()
458
459 - def __getattr__(self, value) :
460 try : 461 return getattr(self.__orig_a, value) 462 except AttributeError : 463 return getattr(self.__a, value)
464