1 from __future__ import unicode_literals
14 def _extract_tags(file_contents):
15 if file_contents[1:3] != b'WS':
17 'Not an SWF file; header is %r' % file_contents[:3])
18 if file_contents[:1] == b'C':
19 content = zlib.decompress(file_contents[8:])
21 raise NotImplementedError(
22 'Unsupported compression format %r' %
25 # Determine number of bits in framesize rectangle
26 framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
27 framesize_len = (5 + 4 * framesize_nbits + 7) // 8
29 pos = framesize_len + 2 + 2
30 while pos < len(content):
31 header16 = struct_unpack('<H', content[pos:pos + 2])[0]
33 tag_code = header16 >> 6
34 tag_len = header16 & 0x3f
36 tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
38 assert pos + tag_len <= len(content), \
39 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
40 % (tag_code, pos, tag_len, len(content)))
41 yield (tag_code, content[pos:pos + tag_len])
45 class _AVMClass_Object(object):
46 def __init__(self, avm_class):
47 self.avm_class = avm_class
50 return '%s#%x' % (self.avm_class.name, id(self))
53 class _ScopeDict(dict):
54 def __init__(self, avm_class):
55 super(_ScopeDict, self).__init__()
56 self.avm_class = avm_class
59 return '%s__Scope(%s)' % (
61 super(_ScopeDict, self).__repr__())
64 class _AVMClass(object):
65 def __init__(self, name_idx, name):
66 self.name_idx = name_idx
68 self.method_names = {}
71 self.method_pyfunctions = {}
73 self.variables = _ScopeDict(self)
75 def make_object(self):
76 return _AVMClass_Object(self)
79 return '_AVMClass(%s)' % (self.name)
81 def register_methods(self, methods):
82 self.method_names.update(methods.items())
83 self.method_idxs.update(dict(
85 for name, idx in methods.items()))
88 class _Multiname(object):
89 def __init__(self, kind):
93 return '[MULTINAME kind: 0x%x]' % self.kind
96 def _read_int(reader):
102 b = struct_unpack('<B', buf)[0]
103 res = res | ((b & 0x7f) << shift)
111 res = _read_int(reader)
112 assert res & 0xf0000000 == 0
118 v = _read_int(reader)
119 if v & 0x80000000 != 0:
120 v = - ((v ^ 0xffffffff) + 1)
127 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
128 return struct_unpack('<i', bs + last_byte)[0]
131 def _read_string(reader):
133 resb = reader.read(slen)
134 assert len(resb) == slen
135 return resb.decode('utf-8')
138 def _read_bytes(count, reader):
140 resb = reader.read(count)
141 assert len(resb) == count
145 def _read_byte(reader):
146 resb = _read_bytes(1, reader=reader)
147 res = struct_unpack('<B', resb)[0]
151 StringClass = _AVMClass('(no name idx)', 'String')
152 ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
154 StringClass.name: StringClass,
155 ByteArrayClass.name: ByteArrayClass,
159 class _Undefined(object):
160 def __boolean__(self):
166 undefined = _Undefined()
169 class SWFInterpreter(object):
170 def __init__(self, file_contents):
171 self._patched_functions = {}
173 for tag_code, tag in _extract_tags(file_contents)
175 p = code_tag.index(b'\0', 4) + 1
176 code_reader = io.BytesIO(code_tag[p:])
178 # Parse ABC (AVM2 ByteCode)
180 # Define a couple convenience methods
181 u30 = lambda *args: _u30(*args, reader=code_reader)
182 s32 = lambda *args: _s32(*args, reader=code_reader)
183 u32 = lambda *args: _u32(*args, reader=code_reader)
184 read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
185 read_byte = lambda *args: _read_byte(*args, reader=code_reader)
187 # minor_version + major_version
192 for _c in range(1, int_count):
195 for _c in range(1, uint_count):
198 read_bytes(max(0, (double_count - 1)) * 8)
200 self.constant_strings = ['']
201 for _c in range(1, string_count):
202 s = _read_string(code_reader)
203 self.constant_strings.append(s)
204 namespace_count = u30()
205 for _c in range(1, namespace_count):
209 for _c in range(1, ns_set_count):
211 for _c2 in range(count):
213 multiname_count = u30()
222 0x0e: 2, # MultinameA
223 0x1b: 1, # MultinameL
224 0x1c: 1, # MultinameLA
226 self.multinames = ['']
227 for _c in range(1, multiname_count):
229 assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
231 u30() # namespace_idx
233 self.multinames.append(self.constant_strings[name_idx])
237 self.multinames.append(self.constant_strings[name_idx])
239 self.multinames.append(_Multiname(kind))
240 for _c2 in range(MULTINAME_SIZES[kind]):
245 MethodInfo = collections.namedtuple(
247 ['NEED_ARGUMENTS', 'NEED_REST'])
249 for method_id in range(method_count):
252 for _ in range(param_count):
254 u30() # name index (always 0 for youtube)
256 if flags & 0x08 != 0:
259 for c in range(option_count):
262 if flags & 0x80 != 0:
263 # Param names present
264 for _ in range(param_count):
266 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
267 method_infos.append(mi)
270 metadata_count = u30()
271 for _c in range(metadata_count):
274 for _c2 in range(item_count):
278 def parse_traits_info():
279 trait_name_idx = u30()
280 kind_full = read_byte()
281 kind = kind_full & 0x0f
282 attrs = kind_full >> 4
284 if kind in [0x00, 0x06]: # Slot or Const
286 u30() # type_name_idx
290 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
293 methods[self.multinames[trait_name_idx]] = method_idx
294 elif kind == 0x04: # Class
297 elif kind == 0x05: # Function
300 methods[function_idx] = self.multinames[trait_name_idx]
302 raise ExtractorError('Unsupported trait kind %d' % kind)
304 if attrs & 0x4 != 0: # Metadata present
305 metadata_count = u30()
306 for _c3 in range(metadata_count):
307 u30() # metadata index
314 for class_id in range(class_count):
317 cname = self.multinames[name_idx]
318 avm_class = _AVMClass(name_idx, cname)
319 classes.append(avm_class)
321 u30() # super_name idx
323 if flags & 0x08 != 0: # Protected namespace is present
324 u30() # protected_ns_idx
326 for _c2 in range(intrf_count):
330 for _c2 in range(trait_count):
331 trait_methods = parse_traits_info()
332 avm_class.register_methods(trait_methods)
334 assert len(classes) == class_count
335 self._classes_by_name = dict((c.name, c) for c in classes)
337 for avm_class in classes:
340 for _c2 in range(trait_count):
341 trait_methods = parse_traits_info()
342 avm_class.register_methods(trait_methods)
346 for _c in range(script_count):
349 for _c2 in range(trait_count):
353 method_body_count = u30()
354 Method = collections.namedtuple('Method', ['code', 'local_count'])
355 for _c in range(method_body_count):
359 u30() # init_scope_depth
360 u30() # max_scope_depth
362 code = read_bytes(code_length)
363 for avm_class in classes:
364 if method_idx in avm_class.method_idxs:
365 m = Method(code, local_count)
366 avm_class.methods[avm_class.method_idxs[method_idx]] = m
367 exception_count = u30()
368 for _c2 in range(exception_count):
375 for _c2 in range(trait_count):
378 assert p + code_reader.tell() == len(code_tag)
380 def patch_function(self, avm_class, func_name, f):
381 self._patched_functions[(avm_class, func_name)] = f
383 def extract_class(self, class_name):
385 return self._classes_by_name[class_name]
387 raise ExtractorError('Class %r not found' % class_name)
389 def extract_function(self, avm_class, func_name):
390 p = self._patched_functions.get((avm_class, func_name))
393 if func_name in avm_class.method_pyfunctions:
394 return avm_class.method_pyfunctions[func_name]
395 if func_name in self._classes_by_name:
396 return self._classes_by_name[func_name].make_object()
397 if func_name not in avm_class.methods:
398 raise ExtractorError('Cannot find function %s.%s' % (
399 avm_class.name, func_name))
400 m = avm_class.methods[func_name]
404 coder = io.BytesIO(m.code)
405 s24 = lambda: _s24(coder)
406 u30 = lambda: _u30(coder)
408 registers = [avm_class.variables] + list(args) + [None] * m.local_count
410 scopes = collections.deque([
411 self._classes_by_name, avm_class.variables])
413 opcode = _read_byte(coder)
414 if opcode == 9: # label
415 pass # Spec says: "Do nothing."
416 elif opcode == 16: # jump
418 coder.seek(coder.tell() + offset)
419 elif opcode == 17: # iftrue
423 coder.seek(coder.tell() + offset)
424 elif opcode == 18: # iffalse
428 coder.seek(coder.tell() + offset)
429 elif opcode == 19: # ifeq
434 coder.seek(coder.tell() + offset)
435 elif opcode == 20: # ifne
440 coder.seek(coder.tell() + offset)
441 elif opcode == 21: # iflt
446 coder.seek(coder.tell() + offset)
447 elif opcode == 32: # pushnull
449 elif opcode == 33: # pushundefined
450 stack.append(undefined)
451 elif opcode == 36: # pushbyte
452 v = _read_byte(coder)
454 elif opcode == 38: # pushtrue
456 elif opcode == 39: # pushfalse
458 elif opcode == 40: # pushnan
459 stack.append(float('NaN'))
460 elif opcode == 42: # dup
463 elif opcode == 44: # pushstring
465 stack.append(self.constant_strings[idx])
466 elif opcode == 48: # pushscope
467 new_scope = stack.pop()
468 scopes.append(new_scope)
469 elif opcode == 66: # construct
471 args = list(reversed(
472 [stack.pop() for _ in range(arg_count)]))
474 res = obj.avm_class.make_object()
476 elif opcode == 70: # callproperty
478 mname = self.multinames[index]
480 args = list(reversed(
481 [stack.pop() for _ in range(arg_count)]))
484 if obj == StringClass:
485 if mname == 'String':
486 assert len(args) == 1
487 assert isinstance(args[0], (
488 int, compat_str, _Undefined))
489 if args[0] == undefined:
492 res = compat_str(args[0])
496 raise NotImplementedError(
497 'Function String.%s is not yet implemented'
499 elif isinstance(obj, _AVMClass_Object):
500 func = self.extract_function(obj.avm_class, mname)
504 elif isinstance(obj, _AVMClass):
505 func = self.extract_function(obj, mname)
509 elif isinstance(obj, _ScopeDict):
510 if mname in obj.avm_class.method_names:
511 func = self.extract_function(obj.avm_class, mname)
517 elif isinstance(obj, compat_str):
519 assert len(args) == 1
520 assert isinstance(args[0], compat_str)
524 res = obj.split(args[0])
527 elif mname == 'charCodeAt':
528 assert len(args) <= 1
529 idx = 0 if len(args) == 0 else args[0]
530 assert isinstance(idx, int)
534 elif isinstance(obj, list):
536 assert len(args) == 1
537 assert isinstance(args[0], int)
541 elif mname == 'join':
542 assert len(args) == 1
543 assert isinstance(args[0], compat_str)
544 res = args[0].join(obj)
547 raise NotImplementedError(
548 'Unsupported property %r on %r'
550 elif opcode == 71: # returnvoid
553 elif opcode == 72: # returnvalue
556 elif opcode == 74: # constructproperty
559 args = list(reversed(
560 [stack.pop() for _ in range(arg_count)]))
563 mname = self.multinames[index]
564 assert isinstance(obj, _AVMClass)
566 # We do not actually call the constructor for now;
567 # we just pretend it does nothing
568 stack.append(obj.make_object())
569 elif opcode == 79: # callpropvoid
571 mname = self.multinames[index]
573 args = list(reversed(
574 [stack.pop() for _ in range(arg_count)]))
576 if isinstance(obj, _AVMClass_Object):
577 func = self.extract_function(obj.avm_class, mname)
579 assert res is undefined
581 if isinstance(obj, _ScopeDict):
582 assert mname in obj.avm_class.method_names
583 func = self.extract_function(obj.avm_class, mname)
585 assert res is undefined
587 if mname == 'reverse':
588 assert isinstance(obj, list)
591 raise NotImplementedError(
592 'Unsupported (void) property %r on %r'
594 elif opcode == 86: # newarray
597 for i in range(arg_count):
598 arr.append(stack.pop())
601 elif opcode == 93: # findpropstrict
603 mname = self.multinames[index]
604 for s in reversed(scopes):
610 if mname not in res and mname in _builtin_classes:
611 stack.append(_builtin_classes[mname])
613 stack.append(res[mname])
614 elif opcode == 94: # findproperty
616 mname = self.multinames[index]
617 for s in reversed(scopes):
622 res = avm_class.variables
624 elif opcode == 96: # getlex
626 mname = self.multinames[index]
627 for s in reversed(scopes):
632 scope = avm_class.variables
633 # I cannot find where static variables are initialized
634 # so let's just return None
635 res = scope.get(mname)
637 elif opcode == 97: # setproperty
640 idx = self.multinames[index]
641 if isinstance(idx, _Multiname):
645 elif opcode == 98: # getlocal
647 stack.append(registers[index])
648 elif opcode == 99: # setlocal
651 registers[index] = value
652 elif opcode == 102: # getproperty
654 pname = self.multinames[index]
655 if pname == 'length':
657 assert isinstance(obj, (compat_str, list))
658 stack.append(len(obj))
659 elif isinstance(pname, compat_str): # Member access
661 assert isinstance(obj, (dict, _ScopeDict)), \
662 'Accessing member %r on %r' % (pname, obj)
663 res = obj.get(pname, undefined)
665 else: # Assume attribute access
667 assert isinstance(idx, int)
669 assert isinstance(obj, list)
670 stack.append(obj[idx])
671 elif opcode == 115: # convert_
673 intvalue = int(value)
674 stack.append(intvalue)
675 elif opcode == 128: # coerce
677 elif opcode == 130: # coerce_a
679 # um, yes, it's any value
681 elif opcode == 133: # coerce_s
682 assert isinstance(stack[-1], (type(None), compat_str))
683 elif opcode == 147: # decrement
685 assert isinstance(value, int)
686 stack.append(value - 1)
687 elif opcode == 149: # typeof
690 _Undefined: 'undefined',
691 compat_str: 'String',
695 elif opcode == 160: # add
698 res = value1 + value2
700 elif opcode == 161: # subtract
703 res = value1 - value2
705 elif opcode == 162: # multiply
708 res = value1 * value2
710 elif opcode == 164: # modulo
713 res = value1 % value2
715 elif opcode == 171: # equals
718 result = value1 == value2
720 elif opcode == 175: # greaterequals
723 result = value1 >= value2
725 elif opcode == 192: # increment_i
727 assert isinstance(value, int)
728 stack.append(value + 1)
729 elif opcode == 208: # getlocal_0
730 stack.append(registers[0])
731 elif opcode == 209: # getlocal_1
732 stack.append(registers[1])
733 elif opcode == 210: # getlocal_2
734 stack.append(registers[2])
735 elif opcode == 211: # getlocal_3
736 stack.append(registers[3])
737 elif opcode == 212: # setlocal_0
738 registers[0] = stack.pop()
739 elif opcode == 213: # setlocal_1
740 registers[1] = stack.pop()
741 elif opcode == 214: # setlocal_2
742 registers[2] = stack.pop()
743 elif opcode == 215: # setlocal_3
744 registers[3] = stack.pop()
746 raise NotImplementedError(
747 'Unsupported opcode %d' % opcode)
749 avm_class.method_pyfunctions[func_name] = resfunc