youtube_dl/FileDownloader.py

   1 import math
   2 import io
   3 import os
   4 import re
   5 import shutil
   6 import socket
   7 import subprocess
   8 import sys
   9 import time
  10 import traceback
  11
  12 if os.name == 'nt':
  13     import ctypes
  14
  15 from .utils import *
  16 from .extractor import get_info_extractor
  17
  18
  19 class FileDownloader(object):
  20     """File Downloader class.
  21
  22     File downloader objects are the ones responsible of downloading the
  23     actual video file and writing it to disk if the user has requested
  24     it, among some other tasks. In most cases there should be one per
  25     program. As, given a video URL, the downloader doesn't know how to
  26     extract all the needed information, task that InfoExtractors do, it
  27     has to pass the URL to one of them.
  28
  29     For this, file downloader objects have a method that allows
  30     InfoExtractors to be registered in a given order. When it is passed
  31     a URL, the file downloader handles it to the first InfoExtractor it
  32     finds that reports being able to handle it. The InfoExtractor extracts
  33     all the information about the video or videos the URL refers to, and
  34     asks the FileDownloader to process the video information, possibly
  35     downloading the video.
  36
  37     File downloaders accept a lot of parameters. In order not to saturate
  38     the object constructor with arguments, it receives a dictionary of
  39     options instead. These options are available through the params
  40     attribute for the InfoExtractors to use. The FileDownloader also
  41     registers itself as the downloader in charge for the InfoExtractors
  42     that are added to it, so this is a "mutual registration".
  43
  44     Available options:
  45
  46     username:          Username for authentication purposes.
  47     password:          Password for authentication purposes.
  48     usenetrc:          Use netrc for authentication instead.
  49     quiet:             Do not print messages to stdout.
  50     forceurl:          Force printing final URL.
  51     forcetitle:        Force printing title.
  52     forceid:           Force printing ID.
  53     forcethumbnail:    Force printing thumbnail URL.
  54     forcedescription:  Force printing description.
  55     forcefilename:     Force printing final filename.
  56     simulate:          Do not download the video files.
  57     format:            Video format code.
  58     format_limit:      Highest quality format to try.
  59     outtmpl:           Template for output names.
  60     restrictfilenames: Do not allow "&" and spaces in file names
  61     ignoreerrors:      Do not stop on download errors.
  62     ratelimit:         Download speed limit, in bytes/sec.
  63     nooverwrites:      Prevent overwriting files.
  64     retries:           Number of times to retry for HTTP error 5xx
  65     buffersize:        Size of download buffer in bytes.
  66     noresizebuffer:    Do not automatically resize the download buffer.
  67     continuedl:        Try to continue downloads if possible.
  68     noprogress:        Do not print the progress bar.
  69     playliststart:     Playlist item to start at.
  70     playlistend:       Playlist item to end at.
  71     matchtitle:        Download only matching titles.
  72     rejecttitle:       Reject downloads for matching titles.
  73     logtostderr:       Log messages to stderr instead of stdout.
  74     consoletitle:      Display progress in console window's titlebar.
  75     nopart:            Do not use temporary .part files.
  76     updatetime:        Use the Last-modified header to set output file timestamps.
  77     writedescription:  Write the video description to a .description file
  78     writeinfojson:     Write the video description to a .info.json file
  79     writethumbnail:    Write the thumbnail image to a file
  80     writesubtitles:    Write the video subtitles to a file
  81     allsubtitles:      Downloads all the subtitles of the video
  82     listsubtitles:     Lists all available subtitles for the video
  83     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
  84     subtitleslang:     Language of the subtitles to download
  85     test:              Download only first bytes to test the downloader.
  86     keepvideo:         Keep the video file after post-processing
  87     min_filesize:      Skip files smaller than this size
  88     max_filesize:      Skip files larger than this size
  89     daterange:         A DateRange object, download only if the upload_date is in the range.
  90     skip_download:     Skip the actual download of the video file
  91     """
  92
  93     params = None
  94     _ies = []
  95     _pps = []
  96     _download_retcode = None
  97     _num_downloads = None
  98     _screen_file = None
  99
 100     def __init__(self, params):
 101         """Create a FileDownloader object with the given options."""
 102         self._ies = []
 103         self._pps = []
 104         self._progress_hooks = []
 105         self._download_retcode = 0
 106         self._num_downloads = 0
 107         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 108         self.params = params
 109
 110         if '%(stitle)s' in self.params['outtmpl']:
 111             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 112
 113     @staticmethod
 114     def format_bytes(bytes):
 115         if bytes is None:
 116             return 'N/A'
 117         if type(bytes) is str:
 118             bytes = float(bytes)
 119         if bytes == 0.0:
 120             exponent = 0
 121         else:
 122             exponent = int(math.log(bytes, 1024.0))
 123         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
 124         converted = float(bytes) / float(1024 ** exponent)
 125         return '%.2f%s' % (converted, suffix)
 126
 127     @staticmethod
 128     def calc_percent(byte_counter, data_len):
 129         if data_len is None:
 130             return '---.-%'
 131         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 132
 133     @staticmethod
 134     def calc_eta(start, now, total, current):
 135         if total is None:
 136             return '--:--'
 137         dif = now - start
 138         if current == 0 or dif < 0.001: # One millisecond
 139             return '--:--'
 140         rate = float(current) / dif
 141         eta = int((float(total) - float(current)) / rate)
 142         (eta_mins, eta_secs) = divmod(eta, 60)
 143         if eta_mins > 99:
 144             return '--:--'
 145         return '%02d:%02d' % (eta_mins, eta_secs)
 146
 147     @staticmethod
 148     def calc_speed(start, now, bytes):
 149         dif = now - start
 150         if bytes == 0 or dif < 0.001: # One millisecond
 151             return '%10s' % '---b/s'
 152         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 153
 154     @staticmethod
 155     def best_block_size(elapsed_time, bytes):
 156         new_min = max(bytes / 2.0, 1.0)
 157         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 158         if elapsed_time < 0.001:
 159             return int(new_max)
 160         rate = bytes / elapsed_time
 161         if rate > new_max:
 162             return int(new_max)
 163         if rate < new_min:
 164             return int(new_min)
 165         return int(rate)
 166
 167     @staticmethod
 168     def parse_bytes(bytestr):
 169         """Parse a string indicating a byte quantity into an integer."""
 170         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 171         if matchobj is None:
 172             return None
 173         number = float(matchobj.group(1))
 174         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 175         return int(round(number * multiplier))
 176
 177     def add_info_extractor(self, ie):
 178         """Add an InfoExtractor object to the end of the list."""
 179         self._ies.append(ie)
 180         ie.set_downloader(self)
 181
 182     def add_post_processor(self, pp):
 183         """Add a PostProcessor object to the end of the chain."""
 184         self._pps.append(pp)
 185         pp.set_downloader(self)
 186
 187     def to_screen(self, message, skip_eol=False):
 188         """Print message to stdout if not in quiet mode."""
 189         assert type(message) == type(u'')
 190         if not self.params.get('quiet', False):
 191             terminator = [u'\n', u''][skip_eol]
 192             output = message + terminator
 193             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 194                 output = output.encode(preferredencoding(), 'ignore')
 195             self._screen_file.write(output)
 196             self._screen_file.flush()
 197
 198     def to_stderr(self, message):
 199         """Print message to stderr."""
 200         assert type(message) == type(u'')
 201         output = message + u'\n'
 202         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 203             output = output.encode(preferredencoding())
 204         sys.stderr.write(output)
 205
 206     def to_cons_title(self, message):
 207         """Set console/terminal window title to message."""
 208         if not self.params.get('consoletitle', False):
 209             return
 210         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 211             # c_wchar_p() might not be necessary if `message` is
 212             # already of type unicode()
 213             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 214         elif 'TERM' in os.environ:
 215             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 216
 217     def fixed_template(self):
 218         """Checks if the output template is fixed."""
 219         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 220
 221     def trouble(self, message=None, tb=None):
 222         """Determine action to take when a download problem appears.
 223
 224         Depending on if the downloader has been configured to ignore
 225         download errors or not, this method may throw an exception or
 226         not when errors are found, after printing the message.
 227
 228         tb, if given, is additional traceback information.
 229         """
 230         if message is not None:
 231             self.to_stderr(message)
 232         if self.params.get('verbose'):
 233             if tb is None:
 234                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 235                     tb = u''
 236                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 237                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 238                     tb += compat_str(traceback.format_exc())
 239                 else:
 240                     tb_data = traceback.format_list(traceback.extract_stack())
 241                     tb = u''.join(tb_data)
 242             self.to_stderr(tb)
 243         if not self.params.get('ignoreerrors', False):
 244             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 245                 exc_info = sys.exc_info()[1].exc_info
 246             else:
 247                 exc_info = sys.exc_info()
 248             raise DownloadError(message, exc_info)
 249         self._download_retcode = 1
 250
 251     def report_warning(self, message):
 252         '''
 253         Print the message to stderr, it will be prefixed with 'WARNING:'
 254         If stderr is a tty file the 'WARNING:' will be colored
 255         '''
 256         if sys.stderr.isatty() and os.name != 'nt':
 257             _msg_header=u'\033[0;33mWARNING:\033[0m'
 258         else:
 259             _msg_header=u'WARNING:'
 260         warning_message=u'%s %s' % (_msg_header,message)
 261         self.to_stderr(warning_message)
 262
 263     def report_error(self, message, tb=None):
 264         '''
 265         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 266         in red if stderr is a tty file.
 267         '''
 268         if sys.stderr.isatty() and os.name != 'nt':
 269             _msg_header = u'\033[0;31mERROR:\033[0m'
 270         else:
 271             _msg_header = u'ERROR:'
 272         error_message = u'%s %s' % (_msg_header, message)
 273         self.trouble(error_message, tb)
 274
 275     def slow_down(self, start_time, byte_counter):
 276         """Sleep if the download speed is over the rate limit."""
 277         rate_limit = self.params.get('ratelimit', None)
 278         if rate_limit is None or byte_counter == 0:
 279             return
 280         now = time.time()
 281         elapsed = now - start_time
 282         if elapsed <= 0.0:
 283             return
 284         speed = float(byte_counter) / elapsed
 285         if speed > rate_limit:
 286             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 287
 288     def temp_name(self, filename):
 289         """Returns a temporary filename for the given filename."""
 290         if self.params.get('nopart', False) or filename == u'-' or \
 291                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 292             return filename
 293         return filename + u'.part'
 294
 295     def undo_temp_name(self, filename):
 296         if filename.endswith(u'.part'):
 297             return filename[:-len(u'.part')]
 298         return filename
 299
 300     def try_rename(self, old_filename, new_filename):
 301         try:
 302             if old_filename == new_filename:
 303                 return
 304             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 305         except (IOError, OSError) as err:
 306             self.report_error(u'unable to rename file')
 307
 308     def try_utime(self, filename, last_modified_hdr):
 309         """Try to set the last-modified time of the given file."""
 310         if last_modified_hdr is None:
 311             return
 312         if not os.path.isfile(encodeFilename(filename)):
 313             return
 314         timestr = last_modified_hdr
 315         if timestr is None:
 316             return
 317         filetime = timeconvert(timestr)
 318         if filetime is None:
 319             return filetime
 320         # Ignore obviously invalid dates
 321         if filetime == 0:
 322             return
 323         try:
 324             os.utime(filename, (time.time(), filetime))
 325         except:
 326             pass
 327         return filetime
 328
 329     def report_writedescription(self, descfn):
 330         """ Report that the description file is being written """
 331         self.to_screen(u'[info] Writing video description to: ' + descfn)
 332
 333     def report_writesubtitles(self, sub_filename):
 334         """ Report that the subtitles file is being written """
 335         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 336
 337     def report_writeinfojson(self, infofn):
 338         """ Report that the metadata file has been written """
 339         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 340
 341     def report_destination(self, filename):
 342         """Report destination filename."""
 343         self.to_screen(u'[download] Destination: ' + filename)
 344
 345     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 346         """Report download progress."""
 347         if self.params.get('noprogress', False):
 348             return
 349         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
 350         if self.params.get('progress_with_newline', False):
 351             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 352                 (percent_str, data_len_str, speed_str, eta_str))
 353         else:
 354             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
 355                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 356         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 357                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 358
 359     def report_resuming_byte(self, resume_len):
 360         """Report attempt to resume at given byte."""
 361         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 362
 363     def report_retry(self, count, retries):
 364         """Report retry in case of HTTP error 5xx"""
 365         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 366
 367     def report_file_already_downloaded(self, file_name):
 368         """Report file has already been fully downloaded."""
 369         try:
 370             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 371         except (UnicodeEncodeError) as err:
 372             self.to_screen(u'[download] The file has already been downloaded')
 373
 374     def report_unable_to_resume(self):
 375         """Report it was impossible to resume download."""
 376         self.to_screen(u'[download] Unable to resume')
 377
 378     def report_finish(self):
 379         """Report download finished."""
 380         if self.params.get('noprogress', False):
 381             self.to_screen(u'[download] Download completed')
 382         else:
 383             self.to_screen(u'')
 384
 385     def increment_downloads(self):
 386         """Increment the ordinal that assigns a number to each file."""
 387         self._num_downloads += 1
 388
 389     def prepare_filename(self, info_dict):
 390         """Generate the output filename."""
 391         try:
 392             template_dict = dict(info_dict)
 393
 394             template_dict['epoch'] = int(time.time())
 395             autonumber_size = self.params.get('autonumber_size')
 396             if autonumber_size is None:
 397                 autonumber_size = 5
 398             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 399             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 400             if template_dict['playlist_index'] is not None:
 401                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 402
 403             sanitize = lambda k,v: sanitize_filename(
 404                 u'NA' if v is None else compat_str(v),
 405                 restricted=self.params.get('restrictfilenames'),
 406                 is_id=(k==u'id'))
 407             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 408
 409             filename = self.params['outtmpl'] % template_dict
 410             return filename
 411         except KeyError as err:
 412             self.report_error(u'Erroneous output template')
 413             return None
 414         except ValueError as err:
 415             self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
 416             return None
 417
 418     def _match_entry(self, info_dict):
 419         """ Returns None iff the file should be downloaded """
 420
 421         title = info_dict['title']
 422         matchtitle = self.params.get('matchtitle', False)
 423         if matchtitle:
 424             if not re.search(matchtitle, title, re.IGNORECASE):
 425                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 426         rejecttitle = self.params.get('rejecttitle', False)
 427         if rejecttitle:
 428             if re.search(rejecttitle, title, re.IGNORECASE):
 429                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 430         date = info_dict.get('upload_date', None)
 431         if date is not None:
 432             dateRange = self.params.get('daterange', DateRange())
 433             if date not in dateRange:
 434                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 435         return None
 436
 437     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 438         '''
 439         Returns a list with a dictionary for each video we find.
 440         If 'download', also downloads the videos.
 441         extra_info is a dict containing the extra values to add to each result
 442          '''
 443
 444         if ie_key:
 445             ie = get_info_extractor(ie_key)()
 446             ie.set_downloader(self)
 447             ies = [ie]
 448         else:
 449             ies = self._ies
 450
 451         for ie in ies:
 452             if not ie.suitable(url):
 453                 continue
 454
 455             if not ie.working():
 456                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 457                                     u'and will probably not work.')
 458
 459             try:
 460                 ie_result = ie.extract(url)
 461                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 462                     break
 463                 if isinstance(ie_result, list):
 464                     # Backwards compatibility: old IE result format
 465                     for result in ie_result:
 466                         result.update(extra_info)
 467                     ie_result = {
 468                         '_type': 'compat_list',
 469                         'entries': ie_result,
 470                     }
 471                 else:
 472                     ie_result.update(extra_info)
 473                 if 'extractor' not in ie_result:
 474                     ie_result['extractor'] = ie.IE_NAME
 475                 return self.process_ie_result(ie_result, download=download)
 476             except ExtractorError as de: # An error we somewhat expected
 477                 self.report_error(compat_str(de), de.format_traceback())
 478                 break
 479             except Exception as e:
 480                 if self.params.get('ignoreerrors', False):
 481                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 482                     break
 483                 else:
 484                     raise
 485         else:
 486             self.report_error(u'no suitable InfoExtractor: %s' % url)
 487
 488     def process_ie_result(self, ie_result, download=True, extra_info={}):
 489         """
 490         Take the result of the ie(may be modified) and resolve all unresolved
 491         references (URLs, playlist items).
 492
 493         It will also download the videos if 'download'.
 494         Returns the resolved ie_result.
 495         """
 496
 497         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 498         if result_type == 'video':
 499             if 'playlist' not in ie_result:
 500                 # It isn't part of a playlist
 501                 ie_result['playlist'] = None
 502                 ie_result['playlist_index'] = None
 503             if download:
 504                 self.process_info(ie_result)
 505             return ie_result
 506         elif result_type == 'url':
 507             # We have to add extra_info to the results because it may be
 508             # contained in a playlist
 509             return self.extract_info(ie_result['url'],
 510                                      download,
 511                                      ie_key=ie_result.get('ie_key'),
 512                                      extra_info=extra_info)
 513         elif result_type == 'playlist':
 514             # We process each entry in the playlist
 515             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 516             self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
 517
 518             playlist_results = []
 519
 520             n_all_entries = len(ie_result['entries'])
 521             playliststart = self.params.get('playliststart', 1) - 1
 522             playlistend = self.params.get('playlistend', -1)
 523
 524             if playlistend == -1:
 525                 entries = ie_result['entries'][playliststart:]
 526             else:
 527                 entries = ie_result['entries'][playliststart:playlistend]
 528
 529             n_entries = len(entries)
 530
 531             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 532                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 533
 534             for i,entry in enumerate(entries,1):
 535                 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
 536                 extra = {
 537                          'playlist': playlist,
 538                          'playlist_index': i + playliststart,
 539                          }
 540                 if not 'extractor' in entry:
 541                     # We set the extractor, if it's an url it will be set then to
 542                     # the new extractor, but if it's already a video we must make
 543                     # sure it's present: see issue #877
 544                     entry['extractor'] = ie_result['extractor']
 545                 entry_result = self.process_ie_result(entry,
 546                                                       download=download,
 547                                                       extra_info=extra)
 548                 playlist_results.append(entry_result)
 549             ie_result['entries'] = playlist_results
 550             return ie_result
 551         elif result_type == 'compat_list':
 552             def _fixup(r):
 553                 r.setdefault('extractor', ie_result['extractor'])
 554                 return r
 555             ie_result['entries'] = [
 556                 self.process_ie_result(_fixup(r), download=download)
 557                 for r in ie_result['entries']
 558             ]
 559             return ie_result
 560         else:
 561             raise Exception('Invalid result type: %s' % result_type)
 562
 563     def process_info(self, info_dict):
 564         """Process a single resolved IE result."""
 565
 566         assert info_dict.get('_type', 'video') == 'video'
 567         #We increment the download the download count here to match the previous behaviour.
 568         self.increment_downloads()
 569
 570         info_dict['fulltitle'] = info_dict['title']
 571         if len(info_dict['title']) > 200:
 572             info_dict['title'] = info_dict['title'][:197] + u'...'
 573
 574         # Keep for backwards compatibility
 575         info_dict['stitle'] = info_dict['title']
 576
 577         if not 'format' in info_dict:
 578             info_dict['format'] = info_dict['ext']
 579
 580         reason = self._match_entry(info_dict)
 581         if reason is not None:
 582             self.to_screen(u'[download] ' + reason)
 583             return
 584
 585         max_downloads = self.params.get('max_downloads')
 586         if max_downloads is not None:
 587             if self._num_downloads > int(max_downloads):
 588                 raise MaxDownloadsReached()
 589
 590         filename = self.prepare_filename(info_dict)
 591
 592         # Forced printings
 593         if self.params.get('forcetitle', False):
 594             compat_print(info_dict['title'])
 595         if self.params.get('forceid', False):
 596             compat_print(info_dict['id'])
 597         if self.params.get('forceurl', False):
 598             compat_print(info_dict['url'])
 599         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 600             compat_print(info_dict['thumbnail'])
 601         if self.params.get('forcedescription', False) and 'description' in info_dict:
 602             compat_print(info_dict['description'])
 603         if self.params.get('forcefilename', False) and filename is not None:
 604             compat_print(filename)
 605         if self.params.get('forceformat', False):
 606             compat_print(info_dict['format'])
 607
 608         # Do nothing else if in simulate mode
 609         if self.params.get('simulate', False):
 610             return
 611
 612         if filename is None:
 613             return
 614
 615         try:
 616             dn = os.path.dirname(encodeFilename(filename))
 617             if dn != '' and not os.path.exists(dn):
 618                 os.makedirs(dn)
 619         except (OSError, IOError) as err:
 620             self.report_error(u'unable to create directory ' + compat_str(err))
 621             return
 622
 623         if self.params.get('writedescription', False):
 624             try:
 625                 descfn = filename + u'.description'
 626                 self.report_writedescription(descfn)
 627                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 628                     descfile.write(info_dict['description'])
 629             except (OSError, IOError):
 630                 self.report_error(u'Cannot write description file ' + descfn)
 631                 return
 632
 633         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 634             # subtitles download errors are already managed as troubles in relevant IE
 635             # that way it will silently go on when used with unsupporting IE
 636             subtitle = info_dict['subtitles'][0]
 637             (sub_error, sub_lang, sub) = subtitle
 638             sub_format = self.params.get('subtitlesformat')
 639             if sub_error:
 640                 self.report_warning("Some error while getting the subtitles")
 641             else:
 642                 try:
 643                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 644                     self.report_writesubtitles(sub_filename)
 645                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 646                         subfile.write(sub)
 647                 except (OSError, IOError):
 648                     self.report_error(u'Cannot write subtitles file ' + descfn)
 649                     return
 650
 651         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 652             subtitles = info_dict['subtitles']
 653             sub_format = self.params.get('subtitlesformat')
 654             for subtitle in subtitles:
 655                 (sub_error, sub_lang, sub) = subtitle
 656                 if sub_error:
 657                     self.report_warning("Some error while getting the subtitles")
 658                 else:
 659                     try:
 660                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 661                         self.report_writesubtitles(sub_filename)
 662                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 663                                 subfile.write(sub)
 664                     except (OSError, IOError):
 665                         self.report_error(u'Cannot write subtitles file ' + descfn)
 666                         return
 667
 668         if self.params.get('writeinfojson', False):
 669             infofn = filename + u'.info.json'
 670             self.report_writeinfojson(infofn)
 671             try:
 672                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 673                 write_json_file(json_info_dict, encodeFilename(infofn))
 674             except (OSError, IOError):
 675                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 676                 return
 677
 678         if self.params.get('writethumbnail', False):
 679             if 'thumbnail' in info_dict:
 680                 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
 681                 if not thumb_format:
 682                     thumb_format = 'jpg'
 683                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 684                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 685                                (info_dict['extractor'], info_dict['id']))
 686                 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 687                 with open(thumb_filename, 'wb') as thumbf:
 688                     shutil.copyfileobj(uf, thumbf)
 689                 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 690                                (info_dict['extractor'], info_dict['id'], thumb_filename))
 691
 692         if not self.params.get('skip_download', False):
 693             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 694                 success = True
 695             else:
 696                 try:
 697                     success = self._do_download(filename, info_dict)
 698                 except (OSError, IOError) as err:
 699                     raise UnavailableVideoError()
 700                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 701                     self.report_error(u'unable to download video data: %s' % str(err))
 702                     return
 703                 except (ContentTooShortError, ) as err:
 704                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 705                     return
 706
 707             if success:
 708                 try:
 709                     self.post_process(filename, info_dict)
 710                 except (PostProcessingError) as err:
 711                     self.report_error(u'postprocessing: %s' % str(err))
 712                     return
 713
 714     def download(self, url_list):
 715         """Download a given list of URLs."""
 716         if len(url_list) > 1 and self.fixed_template():
 717             raise SameFileError(self.params['outtmpl'])
 718
 719         for url in url_list:
 720             try:
 721                 #It also downloads the videos
 722                 videos = self.extract_info(url)
 723             except UnavailableVideoError:
 724                 self.report_error(u'unable to download video')
 725             except MaxDownloadsReached:
 726                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 727                 raise
 728
 729         return self._download_retcode
 730
 731     def post_process(self, filename, ie_info):
 732         """Run all the postprocessors on the given file."""
 733         info = dict(ie_info)
 734         info['filepath'] = filename
 735         keep_video = None
 736         for pp in self._pps:
 737             try:
 738                 keep_video_wish,new_info = pp.run(info)
 739                 if keep_video_wish is not None:
 740                     if keep_video_wish:
 741                         keep_video = keep_video_wish
 742                     elif keep_video is None:
 743                         # No clear decision yet, let IE decide
 744                         keep_video = keep_video_wish
 745             except PostProcessingError as e:
 746                 self.to_stderr(u'ERROR: ' + e.msg)
 747         if keep_video is False and not self.params.get('keepvideo', False):
 748             try:
 749                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 750                 os.remove(encodeFilename(filename))
 751             except (IOError, OSError):
 752                 self.report_warning(u'Unable to remove downloaded video file')
 753
 754     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
 755         self.report_destination(filename)
 756         tmpfilename = self.temp_name(filename)
 757
 758         # Check for rtmpdump first
 759         try:
 760             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 761         except (OSError, IOError):
 762             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 763             return False
 764         verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
 765
 766         # Download using rtmpdump. rtmpdump returns exit code 2 when
 767         # the connection was interrumpted and resuming appears to be
 768         # possible. This is part of rtmpdump's normal usage, AFAIK.
 769         basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
 770         if player_url is not None:
 771             basic_args += ['--swfVfy', player_url]
 772         if page_url is not None:
 773             basic_args += ['--pageUrl', page_url]
 774         if play_path is not None:
 775             basic_args += ['--playpath', play_path]
 776         if tc_url is not None:
 777             basic_args += ['--tcUrl', url]
 778         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
 779         if self.params.get('verbose', False):
 780             try:
 781                 import pipes
 782                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 783             except ImportError:
 784                 shell_quote = repr
 785             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 786         retval = subprocess.call(args)
 787         while retval == 2 or retval == 1:
 788             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 789             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 790             time.sleep(5.0) # This seems to be needed
 791             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 792             cursize = os.path.getsize(encodeFilename(tmpfilename))
 793             if prevsize == cursize and retval == 1:
 794                 break
 795              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 796             if prevsize == cursize and retval == 2 and cursize > 1024:
 797                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 798                 retval = 0
 799                 break
 800         if retval == 0:
 801             fsize = os.path.getsize(encodeFilename(tmpfilename))
 802             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 803             self.try_rename(tmpfilename, filename)
 804             self._hook_progress({
 805                 'downloaded_bytes': fsize,
 806                 'total_bytes': fsize,
 807                 'filename': filename,
 808                 'status': 'finished',
 809             })
 810             return True
 811         else:
 812             self.to_stderr(u"\n")
 813             self.report_error(u'rtmpdump exited with code %d' % retval)
 814             return False
 815
 816     def _download_with_mplayer(self, filename, url):
 817         self.report_destination(filename)
 818         tmpfilename = self.temp_name(filename)
 819
 820         args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
 821         # Check for mplayer first
 822         try:
 823             subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 824         except (OSError, IOError):
 825             self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
 826             return False
 827
 828         # Download using mplayer.
 829         retval = subprocess.call(args)
 830         if retval == 0:
 831             fsize = os.path.getsize(encodeFilename(tmpfilename))
 832             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
 833             self.try_rename(tmpfilename, filename)
 834             self._hook_progress({
 835                 'downloaded_bytes': fsize,
 836                 'total_bytes': fsize,
 837                 'filename': filename,
 838                 'status': 'finished',
 839             })
 840             return True
 841         else:
 842             self.to_stderr(u"\n")
 843             self.report_error(u'mplayer exited with code %d' % retval)
 844             return False
 845
 846
 847     def _do_download(self, filename, info_dict):
 848         url = info_dict['url']
 849
 850         # Check file already present
 851         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 852             self.report_file_already_downloaded(filename)
 853             self._hook_progress({
 854                 'filename': filename,
 855                 'status': 'finished',
 856             })
 857             return True
 858
 859         # Attempt to download using rtmpdump
 860         if url.startswith('rtmp'):
 861             return self._download_with_rtmpdump(filename, url,
 862                                                 info_dict.get('player_url', None),
 863                                                 info_dict.get('page_url', None),
 864                                                 info_dict.get('play_path', None),
 865                                                 info_dict.get('tc_url', None))
 866
 867         # Attempt to download using mplayer
 868         if url.startswith('mms') or url.startswith('rtsp'):
 869             return self._download_with_mplayer(filename, url)
 870
 871         tmpfilename = self.temp_name(filename)
 872         stream = None
 873
 874         # Do not include the Accept-Encoding header
 875         headers = {'Youtubedl-no-compression': 'True'}
 876         if 'user_agent' in info_dict:
 877             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 878         basic_request = compat_urllib_request.Request(url, None, headers)
 879         request = compat_urllib_request.Request(url, None, headers)
 880
 881         if self.params.get('test', False):
 882             request.add_header('Range','bytes=0-10240')
 883
 884         # Establish possible resume length
 885         if os.path.isfile(encodeFilename(tmpfilename)):
 886             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 887         else:
 888             resume_len = 0
 889
 890         open_mode = 'wb'
 891         if resume_len != 0:
 892             if self.params.get('continuedl', False):
 893                 self.report_resuming_byte(resume_len)
 894                 request.add_header('Range','bytes=%d-' % resume_len)
 895                 open_mode = 'ab'
 896             else:
 897                 resume_len = 0
 898
 899         count = 0
 900         retries = self.params.get('retries', 0)
 901         while count <= retries:
 902             # Establish connection
 903             try:
 904                 if count == 0 and 'urlhandle' in info_dict:
 905                     data = info_dict['urlhandle']
 906                 data = compat_urllib_request.urlopen(request)
 907                 break
 908             except (compat_urllib_error.HTTPError, ) as err:
 909                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 910                     # Unexpected HTTP error
 911                     raise
 912                 elif err.code == 416:
 913                     # Unable to resume (requested range not satisfiable)
 914                     try:
 915                         # Open the connection again without the range header
 916                         data = compat_urllib_request.urlopen(basic_request)
 917                         content_length = data.info()['Content-Length']
 918                     except (compat_urllib_error.HTTPError, ) as err:
 919                         if err.code < 500 or err.code >= 600:
 920                             raise
 921                     else:
 922                         # Examine the reported length
 923                         if (content_length is not None and
 924                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 925                             # The file had already been fully downloaded.
 926                             # Explanation to the above condition: in issue #175 it was revealed that
 927                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 928                             # changing the file size slightly and causing problems for some users. So
 929                             # I decided to implement a suggested change and consider the file
 930                             # completely downloaded if the file size differs less than 100 bytes from
 931                             # the one in the hard drive.
 932                             self.report_file_already_downloaded(filename)
 933                             self.try_rename(tmpfilename, filename)
 934                             self._hook_progress({
 935                                 'filename': filename,
 936                                 'status': 'finished',
 937                             })
 938                             return True
 939                         else:
 940                             # The length does not match, we start the download over
 941                             self.report_unable_to_resume()
 942                             open_mode = 'wb'
 943                             break
 944             # Retry
 945             count += 1
 946             if count <= retries:
 947                 self.report_retry(count, retries)
 948
 949         if count > retries:
 950             self.report_error(u'giving up after %s retries' % retries)
 951             return False
 952
 953         data_len = data.info().get('Content-length', None)
 954         if data_len is not None:
 955             data_len = int(data_len) + resume_len
 956             min_data_len = self.params.get("min_filesize", None)
 957             max_data_len =  self.params.get("max_filesize", None)
 958             if min_data_len is not None and data_len < min_data_len:
 959                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 960                 return False
 961             if max_data_len is not None and data_len > max_data_len:
 962                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 963                 return False
 964
 965         data_len_str = self.format_bytes(data_len)
 966         byte_counter = 0 + resume_len
 967         block_size = self.params.get('buffersize', 1024)
 968         start = time.time()
 969         while True:
 970             # Download and write
 971             before = time.time()
 972             data_block = data.read(block_size)
 973             after = time.time()
 974             if len(data_block) == 0:
 975                 break
 976             byte_counter += len(data_block)
 977
 978             # Open file just in time
 979             if stream is None:
 980                 try:
 981                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 982                     assert stream is not None
 983                     filename = self.undo_temp_name(tmpfilename)
 984                     self.report_destination(filename)
 985                 except (OSError, IOError) as err:
 986                     self.report_error(u'unable to open for writing: %s' % str(err))
 987                     return False
 988             try:
 989                 stream.write(data_block)
 990             except (IOError, OSError) as err:
 991                 self.to_stderr(u"\n")
 992                 self.report_error(u'unable to write data: %s' % str(err))
 993                 return False
 994             if not self.params.get('noresizebuffer', False):
 995                 block_size = self.best_block_size(after - before, len(data_block))
 996
 997             # Progress message
 998             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 999             if data_len is None:
1000                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1001             else:
1002                 percent_str = self.calc_percent(byte_counter, data_len)
1003                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1004                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1005
1006             self._hook_progress({
1007                 'downloaded_bytes': byte_counter,
1008                 'total_bytes': data_len,
1009                 'tmpfilename': tmpfilename,
1010                 'filename': filename,
1011                 'status': 'downloading',
1012             })
1013
1014             # Apply rate limit
1015             self.slow_down(start, byte_counter - resume_len)
1016
1017         if stream is None:
1018             self.to_stderr(u"\n")
1019             self.report_error(u'Did not get any data blocks')
1020             return False
1021         stream.close()
1022         self.report_finish()
1023         if data_len is not None and byte_counter != data_len:
1024             raise ContentTooShortError(byte_counter, int(data_len))
1025         self.try_rename(tmpfilename, filename)
1026
1027         # Update file modification time
1028         if self.params.get('updatetime', True):
1029             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1030
1031         self._hook_progress({
1032             'downloaded_bytes': byte_counter,
1033             'total_bytes': byte_counter,
1034             'filename': filename,
1035             'status': 'finished',
1036         })
1037
1038         return True
1039
1040     def _hook_progress(self, status):
1041         for ph in self._progress_hooks:
1042             ph(status)
1043
1044     def add_progress_hook(self, ph):
1045         """ ph gets called on download progress, with a dictionary with the entries
1046         * filename: The final filename
1047         * status: One of "downloading" and "finished"
1048
1049         It can also have some of the following entries:
1050
1051         * downloaded_bytes: Bytes on disks
1052         * total_bytes: Total bytes, None if unknown
1053         * tmpfilename: The filename we're currently writing to
1054
1055         Hooks are guaranteed to be called at least once (with status "finished")
1056         if the download is successful.
1057         """
1058         self._progress_hooks.append(ph)