1 from __future__ import unicode_literals
16 def _extract_tags(file_contents):
17 if file_contents[1:3] != b'WS':
19 'Not an SWF file; header is %r' % file_contents[:3])
20 if file_contents[:1] == b'C':
21 content = zlib.decompress(file_contents[8:])
23 raise NotImplementedError(
24 'Unsupported compression format %r' %
27 # Determine number of bits in framesize rectangle
28 framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
29 framesize_len = (5 + 4 * framesize_nbits + 7) // 8
31 pos = framesize_len + 2 + 2
32 while pos < len(content):
33 header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
35 tag_code = header16 >> 6
36 tag_len = header16 & 0x3f
38 tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
40 assert pos + tag_len <= len(content), \
41 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
42 % (tag_code, pos, tag_len, len(content)))
43 yield (tag_code, content[pos:pos + tag_len])
47 class _AVMClass_Object(object):
48 def __init__(self, avm_class):
49 self.avm_class = avm_class
52 return '%s#%x' % (self.avm_class.name, id(self))
55 class _ScopeDict(dict):
56 def __init__(self, avm_class):
57 super(_ScopeDict, self).__init__()
58 self.avm_class = avm_class
61 return '%s__Scope(%s)' % (
63 super(_ScopeDict, self).__repr__())
66 class _AVMClass(object):
67 def __init__(self, name_idx, name, static_properties=None):
68 self.name_idx = name_idx
70 self.method_names = {}
73 self.method_pyfunctions = {}
74 self.static_properties = static_properties if static_properties else {}
76 self.variables = _ScopeDict(self)
79 def make_object(self):
80 return _AVMClass_Object(self)
83 return '_AVMClass(%s)' % (self.name)
85 def register_methods(self, methods):
86 self.method_names.update(methods.items())
87 self.method_idxs.update(dict(
89 for name, idx in methods.items()))
92 class _Multiname(object):
93 def __init__(self, kind):
97 return '[MULTINAME kind: 0x%x]' % self.kind
100 def _read_int(reader):
106 b = compat_struct_unpack('<B', buf)[0]
107 res = res | ((b & 0x7f) << shift)
115 res = _read_int(reader)
116 assert res & 0xf0000000 == 0
124 v = _read_int(reader)
125 if v & 0x80000000 != 0:
126 v = - ((v ^ 0xffffffff) + 1)
133 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
134 return compat_struct_unpack('<i', bs + last_byte)[0]
137 def _read_string(reader):
139 resb = reader.read(slen)
140 assert len(resb) == slen
141 return resb.decode('utf-8')
144 def _read_bytes(count, reader):
146 resb = reader.read(count)
147 assert len(resb) == count
151 def _read_byte(reader):
152 resb = _read_bytes(1, reader=reader)
153 res = compat_struct_unpack('<B', resb)[0]
157 StringClass = _AVMClass('(no name idx)', 'String')
158 ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
159 TimerClass = _AVMClass('(no name idx)', 'Timer')
160 TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
162 StringClass.name: StringClass,
163 ByteArrayClass.name: ByteArrayClass,
164 TimerClass.name: TimerClass,
165 TimerEventClass.name: TimerEventClass,
169 class _Undefined(object):
172 __nonzero__ = __bool__
182 undefined = _Undefined()
185 class SWFInterpreter(object):
186 def __init__(self, file_contents):
187 self._patched_functions = {
188 (TimerClass, 'addEventListener'): lambda params: undefined,
191 for tag_code, tag in _extract_tags(file_contents)
193 p = code_tag.index(b'\0', 4) + 1
194 code_reader = io.BytesIO(code_tag[p:])
196 # Parse ABC (AVM2 ByteCode)
198 # Define a couple convenience methods
199 u30 = lambda *args: _u30(*args, reader=code_reader)
200 s32 = lambda *args: _s32(*args, reader=code_reader)
201 u32 = lambda *args: _u32(*args, reader=code_reader)
202 read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
203 read_byte = lambda *args: _read_byte(*args, reader=code_reader)
205 # minor_version + major_version
210 self.constant_ints = [0]
211 for _c in range(1, int_count):
212 self.constant_ints.append(s32())
213 self.constant_uints = [0]
215 for _c in range(1, uint_count):
216 self.constant_uints.append(u32())
218 read_bytes(max(0, (double_count - 1)) * 8)
220 self.constant_strings = ['']
221 for _c in range(1, string_count):
222 s = _read_string(code_reader)
223 self.constant_strings.append(s)
224 namespace_count = u30()
225 for _c in range(1, namespace_count):
229 for _c in range(1, ns_set_count):
231 for _c2 in range(count):
233 multiname_count = u30()
242 0x0e: 2, # MultinameA
243 0x1b: 1, # MultinameL
244 0x1c: 1, # MultinameLA
246 self.multinames = ['']
247 for _c in range(1, multiname_count):
249 assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
251 u30() # namespace_idx
253 self.multinames.append(self.constant_strings[name_idx])
257 self.multinames.append(self.constant_strings[name_idx])
259 self.multinames.append(_Multiname(kind))
260 for _c2 in range(MULTINAME_SIZES[kind]):
265 MethodInfo = collections.namedtuple(
267 ['NEED_ARGUMENTS', 'NEED_REST'])
269 for method_id in range(method_count):
272 for _ in range(param_count):
274 u30() # name index (always 0 for youtube)
276 if flags & 0x08 != 0:
279 for c in range(option_count):
282 if flags & 0x80 != 0:
283 # Param names present
284 for _ in range(param_count):
286 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
287 method_infos.append(mi)
290 metadata_count = u30()
291 for _c in range(metadata_count):
294 for _c2 in range(item_count):
298 def parse_traits_info():
299 trait_name_idx = u30()
300 kind_full = read_byte()
301 kind = kind_full & 0x0f
302 attrs = kind_full >> 4
305 if kind == 0x00: # Slot
307 u30() # type_name_idx
311 elif kind == 0x06: # Const
313 u30() # type_name_idx
318 if vkind == 0x03: # Constant_Int
319 value = self.constant_ints[vindex]
320 elif vkind == 0x04: # Constant_UInt
321 value = self.constant_uints[vindex]
323 return {}, None # Ignore silently for now
324 constants = {self.multinames[trait_name_idx]: value}
325 elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter
328 methods[self.multinames[trait_name_idx]] = method_idx
329 elif kind == 0x04: # Class
332 elif kind == 0x05: # Function
335 methods[function_idx] = self.multinames[trait_name_idx]
337 raise ExtractorError('Unsupported trait kind %d' % kind)
339 if attrs & 0x4 != 0: # Metadata present
340 metadata_count = u30()
341 for _c3 in range(metadata_count):
342 u30() # metadata index
344 return methods, constants
349 for class_id in range(class_count):
352 cname = self.multinames[name_idx]
353 avm_class = _AVMClass(name_idx, cname)
354 classes.append(avm_class)
356 u30() # super_name idx
358 if flags & 0x08 != 0: # Protected namespace is present
359 u30() # protected_ns_idx
361 for _c2 in range(intrf_count):
365 for _c2 in range(trait_count):
366 trait_methods, trait_constants = parse_traits_info()
367 avm_class.register_methods(trait_methods)
369 avm_class.constants.update(trait_constants)
371 assert len(classes) == class_count
372 self._classes_by_name = dict((c.name, c) for c in classes)
374 for avm_class in classes:
375 avm_class.cinit_idx = u30()
377 for _c2 in range(trait_count):
378 trait_methods, trait_constants = parse_traits_info()
379 avm_class.register_methods(trait_methods)
381 avm_class.constants.update(trait_constants)
385 for _c in range(script_count):
388 for _c2 in range(trait_count):
392 method_body_count = u30()
393 Method = collections.namedtuple('Method', ['code', 'local_count'])
394 self._all_methods = []
395 for _c in range(method_body_count):
399 u30() # init_scope_depth
400 u30() # max_scope_depth
402 code = read_bytes(code_length)
403 m = Method(code, local_count)
404 self._all_methods.append(m)
405 for avm_class in classes:
406 if method_idx in avm_class.method_idxs:
407 avm_class.methods[avm_class.method_idxs[method_idx]] = m
408 exception_count = u30()
409 for _c2 in range(exception_count):
416 for _c2 in range(trait_count):
419 assert p + code_reader.tell() == len(code_tag)
421 def patch_function(self, avm_class, func_name, f):
422 self._patched_functions[(avm_class, func_name)] = f
424 def extract_class(self, class_name, call_cinit=True):
426 res = self._classes_by_name[class_name]
428 raise ExtractorError('Class %r not found' % class_name)
430 if call_cinit and hasattr(res, 'cinit_idx'):
431 res.register_methods({'$cinit': res.cinit_idx})
432 res.methods['$cinit'] = self._all_methods[res.cinit_idx]
433 cinit = self.extract_function(res, '$cinit')
438 def extract_function(self, avm_class, func_name):
439 p = self._patched_functions.get((avm_class, func_name))
442 if func_name in avm_class.method_pyfunctions:
443 return avm_class.method_pyfunctions[func_name]
444 if func_name in self._classes_by_name:
445 return self._classes_by_name[func_name].make_object()
446 if func_name not in avm_class.methods:
447 raise ExtractorError('Cannot find function %s.%s' % (
448 avm_class.name, func_name))
449 m = avm_class.methods[func_name]
453 coder = io.BytesIO(m.code)
454 s24 = lambda: _s24(coder)
455 u30 = lambda: _u30(coder)
457 registers = [avm_class.variables] + list(args) + [None] * m.local_count
459 scopes = collections.deque([
460 self._classes_by_name, avm_class.constants, avm_class.variables])
462 opcode = _read_byte(coder)
463 if opcode == 9: # label
464 pass # Spec says: "Do nothing."
465 elif opcode == 16: # jump
467 coder.seek(coder.tell() + offset)
468 elif opcode == 17: # iftrue
472 coder.seek(coder.tell() + offset)
473 elif opcode == 18: # iffalse
477 coder.seek(coder.tell() + offset)
478 elif opcode == 19: # ifeq
483 coder.seek(coder.tell() + offset)
484 elif opcode == 20: # ifne
489 coder.seek(coder.tell() + offset)
490 elif opcode == 21: # iflt
495 coder.seek(coder.tell() + offset)
496 elif opcode == 32: # pushnull
498 elif opcode == 33: # pushundefined
499 stack.append(undefined)
500 elif opcode == 36: # pushbyte
501 v = _read_byte(coder)
503 elif opcode == 37: # pushshort
506 elif opcode == 38: # pushtrue
508 elif opcode == 39: # pushfalse
510 elif opcode == 40: # pushnan
511 stack.append(float('NaN'))
512 elif opcode == 42: # dup
515 elif opcode == 44: # pushstring
517 stack.append(self.constant_strings[idx])
518 elif opcode == 48: # pushscope
519 new_scope = stack.pop()
520 scopes.append(new_scope)
521 elif opcode == 66: # construct
523 args = list(reversed(
524 [stack.pop() for _ in range(arg_count)]))
526 res = obj.avm_class.make_object()
528 elif opcode == 70: # callproperty
530 mname = self.multinames[index]
532 args = list(reversed(
533 [stack.pop() for _ in range(arg_count)]))
536 if obj == StringClass:
537 if mname == 'String':
538 assert len(args) == 1
539 assert isinstance(args[0], (
540 int, compat_str, _Undefined))
541 if args[0] == undefined:
544 res = compat_str(args[0])
548 raise NotImplementedError(
549 'Function String.%s is not yet implemented'
551 elif isinstance(obj, _AVMClass_Object):
552 func = self.extract_function(obj.avm_class, mname)
556 elif isinstance(obj, _AVMClass):
557 func = self.extract_function(obj, mname)
561 elif isinstance(obj, _ScopeDict):
562 if mname in obj.avm_class.method_names:
563 func = self.extract_function(obj.avm_class, mname)
569 elif isinstance(obj, compat_str):
571 assert len(args) == 1
572 assert isinstance(args[0], compat_str)
576 res = obj.split(args[0])
579 elif mname == 'charCodeAt':
580 assert len(args) <= 1
581 idx = 0 if len(args) == 0 else args[0]
582 assert isinstance(idx, int)
586 elif isinstance(obj, list):
588 assert len(args) == 1
589 assert isinstance(args[0], int)
593 elif mname == 'join':
594 assert len(args) == 1
595 assert isinstance(args[0], compat_str)
596 res = args[0].join(obj)
599 raise NotImplementedError(
600 'Unsupported property %r on %r'
602 elif opcode == 71: # returnvoid
605 elif opcode == 72: # returnvalue
608 elif opcode == 73: # constructsuper
609 # Not yet implemented, just hope it works without it
611 args = list(reversed(
612 [stack.pop() for _ in range(arg_count)]))
614 elif opcode == 74: # constructproperty
617 args = list(reversed(
618 [stack.pop() for _ in range(arg_count)]))
621 mname = self.multinames[index]
622 assert isinstance(obj, _AVMClass)
624 # We do not actually call the constructor for now;
625 # we just pretend it does nothing
626 stack.append(obj.make_object())
627 elif opcode == 79: # callpropvoid
629 mname = self.multinames[index]
631 args = list(reversed(
632 [stack.pop() for _ in range(arg_count)]))
634 if isinstance(obj, _AVMClass_Object):
635 func = self.extract_function(obj.avm_class, mname)
637 assert res is undefined
639 if isinstance(obj, _ScopeDict):
640 assert mname in obj.avm_class.method_names
641 func = self.extract_function(obj.avm_class, mname)
643 assert res is undefined
645 if mname == 'reverse':
646 assert isinstance(obj, list)
649 raise NotImplementedError(
650 'Unsupported (void) property %r on %r'
652 elif opcode == 86: # newarray
655 for i in range(arg_count):
656 arr.append(stack.pop())
659 elif opcode == 93: # findpropstrict
661 mname = self.multinames[index]
662 for s in reversed(scopes):
668 if mname not in res and mname in _builtin_classes:
669 stack.append(_builtin_classes[mname])
671 stack.append(res[mname])
672 elif opcode == 94: # findproperty
674 mname = self.multinames[index]
675 for s in reversed(scopes):
680 res = avm_class.variables
682 elif opcode == 96: # getlex
684 mname = self.multinames[index]
685 for s in reversed(scopes):
690 scope = avm_class.variables
694 elif mname in _builtin_classes:
695 res = _builtin_classes[mname]
697 # Assume uninitialized
701 elif opcode == 97: # setproperty
704 idx = self.multinames[index]
705 if isinstance(idx, _Multiname):
709 elif opcode == 98: # getlocal
711 stack.append(registers[index])
712 elif opcode == 99: # setlocal
715 registers[index] = value
716 elif opcode == 102: # getproperty
718 pname = self.multinames[index]
719 if pname == 'length':
721 assert isinstance(obj, (compat_str, list))
722 stack.append(len(obj))
723 elif isinstance(pname, compat_str): # Member access
725 if isinstance(obj, _AVMClass):
726 res = obj.static_properties[pname]
730 assert isinstance(obj, (dict, _ScopeDict)),\
731 'Accessing member %r on %r' % (pname, obj)
732 res = obj.get(pname, undefined)
734 else: # Assume attribute access
736 assert isinstance(idx, int)
738 assert isinstance(obj, list)
739 stack.append(obj[idx])
740 elif opcode == 104: # initproperty
743 idx = self.multinames[index]
744 if isinstance(idx, _Multiname):
748 elif opcode == 115: # convert_
750 intvalue = int(value)
751 stack.append(intvalue)
752 elif opcode == 128: # coerce
754 elif opcode == 130: # coerce_a
756 # um, yes, it's any value
758 elif opcode == 133: # coerce_s
759 assert isinstance(stack[-1], (type(None), compat_str))
760 elif opcode == 147: # decrement
762 assert isinstance(value, int)
763 stack.append(value - 1)
764 elif opcode == 149: # typeof
767 _Undefined: 'undefined',
768 compat_str: 'String',
772 elif opcode == 160: # add
775 res = value1 + value2
777 elif opcode == 161: # subtract
780 res = value1 - value2
782 elif opcode == 162: # multiply
785 res = value1 * value2
787 elif opcode == 164: # modulo
790 res = value1 % value2
792 elif opcode == 168: # bitand
795 assert isinstance(value1, int)
796 assert isinstance(value2, int)
797 res = value1 & value2
799 elif opcode == 171: # equals
802 result = value1 == value2
804 elif opcode == 175: # greaterequals
807 result = value1 >= value2
809 elif opcode == 192: # increment_i
811 assert isinstance(value, int)
812 stack.append(value + 1)
813 elif opcode == 208: # getlocal_0
814 stack.append(registers[0])
815 elif opcode == 209: # getlocal_1
816 stack.append(registers[1])
817 elif opcode == 210: # getlocal_2
818 stack.append(registers[2])
819 elif opcode == 211: # getlocal_3
820 stack.append(registers[3])
821 elif opcode == 212: # setlocal_0
822 registers[0] = stack.pop()
823 elif opcode == 213: # setlocal_1
824 registers[1] = stack.pop()
825 elif opcode == 214: # setlocal_2
826 registers[2] = stack.pop()
827 elif opcode == 215: # setlocal_3
828 registers[3] = stack.pop()
830 raise NotImplementedError(
831 'Unsupported opcode %d' % opcode)
833 avm_class.method_pyfunctions[func_name] = resfunc