from __future__ import unicode_literals import base64 import io import itertools import os from struct import unpack, pack import time import xml.etree.ElementTree as etree from .common import FileDownloader from .http import HttpFD from ..utils import ( compat_urllib_request, compat_urlparse, format_bytes, encodeFilename, sanitize_open, ) class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ # Utility functions for reading numbers and strings def read_unsigned_long_long(self): return unpack('!Q', self.read(8))[0] def read_unsigned_int(self): return unpack('!I', self.read(4))[0] def read_unsigned_char(self): return unpack('!B', self.read(1))[0] def read_string(self): res = b'' while True: char = self.read(1) if char == b'\x00': break res += char return res def read_box_info(self): """ Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() box_type = self.read(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 return real_size, box_type, self.read(real_size-header_end) def read_asrt(self): # version self.read_unsigned_char() # flags self.read(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] for i in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) return { 'segment_run': segments, } def read_afrt(self): # version self.read_unsigned_char() # flags self.read(3) # time scale self.read_unsigned_int() quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers for i in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] for i in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() if duration == 0: discontinuity_indicator = self.read_unsigned_char() else: discontinuity_indicator = None fragments.append({ 'first': first, 'ts': first_ts, 'duration': duration, 'discontinuity_indicator': discontinuity_indicator, }) return { 'fragments': fragments, } def read_abst(self): # version self.read_unsigned_char() # flags self.read(3) # BootstrapinfoVersion bootstrap_info_version = self.read_unsigned_int() # Profile,Live,Update,Reserved self.read(1) # time scale self.read_unsigned_int() # CurrentMediaTime self.read_unsigned_long_long() # SmpteTimeCodeOffset self.read_unsigned_long_long() # MovieIdentifier movie_identifier = self.read_string() server_count = self.read_unsigned_char() # ServerEntryTable for i in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable for i in range(server_count): self.read_string() # DrmData self.read_string() # MetaData self.read_string() segments_count = self.read_unsigned_char() segments = [] for i in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] for i in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) return { 'segments': segments, 'fragments': fragments, } def read_bootstrap_info(self): total_size, box_type, box_data = self.read_box_info() assert box_type == b'abst' return FlvReader(box_data).read_abst() def read_bootstrap_info(bootstrap_bytes): return FlvReader(bootstrap_bytes).read_bootstrap_info() def build_fragments_list(boot_info): """ Return a list of (segment, fragment) for each fragment in the video """ res = [] segment_run_table = boot_info['segments'][0] # I've only found videos with one segment segment_run_entry = segment_run_table['segment_run'][0] n_frags = segment_run_entry[1] fragment_run_entry_table = boot_info['fragments'][0]['fragments'] first_frag_number = fragment_run_entry_table[0]['first'] for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)): res.append((1, frag_number)) return res def write_flv_header(stream, metadata): """Writes the FLV header and the metadata to stream""" # FLV header stream.write(b'FLV\x01') stream.write(b'\x05') stream.write(b'\x00\x00\x00\x09') # FLV File body stream.write(b'\x00\x00\x00\x00') # FLVTAG # Script data stream.write(b'\x12') # Size of the metadata with 3 bytes stream.write(pack('!L', len(metadata))[1:]) stream.write(b'\x00\x00\x00\x00\x00\x00\x00') stream.write(metadata) # Magic numbers extracted from the output files produced by AdobeHDS.php #(https://github.com/K-S-V/Scripts) stream.write(b'\x00\x00\x01\x73') def _add_ns(prop): return '{http://ns.adobe.com/f4m/1.0}%s' % prop class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass class F4mFD(FileDownloader): """ A downloader for f4m manifests or AdobeHDS. """ def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() self.report_destination(filename) http_dl = HttpQuietDownloader(self.ydl, {'continuedl': True, 'quiet': True, 'noprogress': True}) doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) metadata = base64.b64decode(media.find(_add_ns('metadata')).text) boot_info = read_bootstrap_info(bootstrap) fragments_list = build_fragments_list(boot_info) total_frags = len(fragments_list) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') write_flv_header(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook state = { 'downloaded_bytes': 0, 'frag_counter': 0, } start = time.time() def frag_progress_hook(status): frag_total_bytes = status.get('total_bytes', 0) estimated_size = (state['downloaded_bytes'] + (total_frags - state['frag_counter']) * frag_total_bytes) if status['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes state['frag_counter'] += 1 progress = self.calc_percent(state['frag_counter'], total_frags) byte_counter = state['downloaded_bytes'] else: frag_downloaded_bytes = status['downloaded_bytes'] byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) progress = self.calc_percent(state['frag_counter'], total_frags) progress += frag_progress / float(total_frags) eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) self.report_progress(progress, format_bytes(estimated_size), status.get('speed'), eta) http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] for (seg_i, frag_i) in fragments_list: name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name frag_filename = '%s-%s' % (tmpfilename, name) success = http_dl.download(frag_filename, {'url': url}) if not success: return False with open(frag_filename, 'rb') as down: down_data = down.read() reader = FlvReader(down_data) while True: _, box_type, box_data = reader.read_box_info() if box_type == b'mdat': dest_stream.write(box_data) break frags_filenames.append(frag_filename) self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(frag_file) fsize = os.path.getsize(encodeFilename(filename)) self._hook_progress({ 'downloaded_bytes': fsize, 'total_bytes': fsize, 'filename': filename, 'status': 'finished', }) return True