youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlistreverse:   Download playlist items in reverse order.
 141     matchtitle:        Download only matching titles.
 142     rejecttitle:       Reject downloads for matching titles.
 143     logger:            Log messages to a logging.Logger instance.
 144     logtostderr:       Log messages to stderr instead of stdout.
 145     writedescription:  Write the video description to a .description file
 146     writeinfojson:     Write the video description to a .info.json file
 147     writeannotations:  Write the video annotations to a .annotations.xml file
 148     writethumbnail:    Write the thumbnail image to a file
 149     write_all_thumbnails:  Write all thumbnail formats to files
 150     writesubtitles:    Write the video subtitles to a file
 151     writeautomaticsub: Write the automatic subtitles to a file
 152     allsubtitles:      Downloads all the subtitles of the video
 153                        (requires writesubtitles or writeautomaticsub)
 154     listsubtitles:     Lists all available subtitles for the video
 155     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 156     subtitleslangs:    List of languages of the subtitles to download
 157     keepvideo:         Keep the video file after post-processing
 158     daterange:         A DateRange object, download only if the upload_date is in the range.
 159     skip_download:     Skip the actual download of the video file
 160     cachedir:          Location of the cache files in the filesystem.
 161                        False to disable filesystem cache.
 162     noplaylist:        Download single video instead of a playlist if in doubt.
 163     age_limit:         An integer representing the user's age in years.
 164                        Unsuitable videos for the given age are skipped.
 165     min_views:         An integer representing the minimum view count the video
 166                        must have in order to not be skipped.
 167                        Videos without view count information are always
 168                        downloaded. None for no limit.
 169     max_views:         An integer representing the maximum view count.
 170                        Videos that are more popular than that are not
 171                        downloaded.
 172                        Videos without view count information are always
 173                        downloaded. None for no limit.
 174     download_archive:  File name of a file where all downloads are recorded.
 175                        Videos already present in the file are not downloaded
 176                        again.
 177     cookiefile:        File name where cookies should be read from and dumped to.
 178     nocheckcertificate:Do not verify SSL certificates
 179     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 180                        At the moment, this is only supported by YouTube.
 181     proxy:             URL of the proxy server to use
 182     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 183     bidi_workaround:   Work around buggy terminals without bidirectional text
 184                        support, using fridibi
 185     debug_printtraffic:Print out sent and received HTTP traffic
 186     include_ads:       Download ads as well
 187     default_search:    Prepend this string if an input url is not valid.
 188                        'auto' for elaborate guessing
 189     encoding:          Use this encoding instead of the system-specified.
 190     extract_flat:      Do not resolve URLs, return the immediate result.
 191                        Pass in 'in_playlist' to only show this behavior for
 192                        playlist items.
 193     postprocessors:    A list of dictionaries, each with an entry
 194                        * key:  The name of the postprocessor. See
 195                                youtube_dl/postprocessor/__init__.py for a list.
 196                        as well as any further keyword arguments for the
 197                        postprocessor.
 198     progress_hooks:    A list of functions that get called on download
 199                        progress, with a dictionary with the entries
 200                        * filename: The final filename
 201                        * status: One of "downloading" and "finished"
 202
 203                        The dict may also have some of the following entries:
 204
 205                        * downloaded_bytes: Bytes on disk
 206                        * total_bytes: Size of the whole file, None if unknown
 207                        * tmpfilename: The filename we're currently writing to
 208                        * eta: The estimated time in seconds, None if unknown
 209                        * speed: The download speed in bytes/second, None if
 210                                 unknown
 211
 212                        Progress hooks are guaranteed to be called at least once
 213                        (with status "finished") if the download is successful.
 214     merge_output_format: Extension to use when merging formats.
 215     fixup:             Automatically correct known faults of the file.
 216                        One of:
 217                        - "never": do nothing
 218                        - "warn": only emit a warning
 219                        - "detect_or_warn": check whether we can do anything
 220                                            about it, warn otherwise (default)
 221     source_address:    (Experimental) Client-side IP address to bind to.
 222     call_home:         Boolean, true iff we are allowed to contact the
 223                        youtube-dl servers for debugging.
 224     sleep_interval:    Number of seconds to sleep before each download.
 225     external_downloader:  Executable of the external downloader to call.
 226     listformats:       Print an overview of available video formats and exit.
 227     list_thumbnails:   Print a table of all thumbnails and exit.
 228
 229
 230     The following parameters are not used by YoutubeDL itself, they are used by
 231     the FileDownloader:
 232     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 233     noresizebuffer, retries, continuedl, noprogress, consoletitle
 234
 235     The following options are used by the post processors:
 236     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 237                        otherwise prefer avconv.
 238     exec_cmd:          Arbitrary command to run after downloading
 239     """
 240
 241     params = None
 242     _ies = []
 243     _pps = []
 244     _download_retcode = None
 245     _num_downloads = None
 246     _screen_file = None
 247
 248     def __init__(self, params=None, auto_init=True):
 249         """Create a FileDownloader object with the given options."""
 250         if params is None:
 251             params = {}
 252         self._ies = []
 253         self._ies_instances = {}
 254         self._pps = []
 255         self._progress_hooks = []
 256         self._download_retcode = 0
 257         self._num_downloads = 0
 258         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 259         self._err_file = sys.stderr
 260         self.params = params
 261         self.cache = Cache(self)
 262
 263         if params.get('bidi_workaround', False):
 264             try:
 265                 import pty
 266                 master, slave = pty.openpty()
 267                 width = get_term_width()
 268                 if width is None:
 269                     width_args = []
 270                 else:
 271                     width_args = ['-w', str(width)]
 272                 sp_kwargs = dict(
 273                     stdin=subprocess.PIPE,
 274                     stdout=slave,
 275                     stderr=self._err_file)
 276                 try:
 277                     self._output_process = subprocess.Popen(
 278                         ['bidiv'] + width_args, **sp_kwargs
 279                     )
 280                 except OSError:
 281                     self._output_process = subprocess.Popen(
 282                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 283                 self._output_channel = os.fdopen(master, 'rb')
 284             except OSError as ose:
 285                 if ose.errno == 2:
 286                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 287                 else:
 288                     raise
 289
 290         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 291                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 292                 and not params.get('restrictfilenames', False)):
 293             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 294             self.report_warning(
 295                 'Assuming --restrict-filenames since file system encoding '
 296                 'cannot encode all characters. '
 297                 'Set the LC_ALL environment variable to fix this.')
 298             self.params['restrictfilenames'] = True
 299
 300         if '%(stitle)s' in self.params.get('outtmpl', ''):
 301             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 302
 303         self._setup_opener()
 304
 305         if auto_init:
 306             self.print_debug_header()
 307             self.add_default_info_extractors()
 308
 309         for pp_def_raw in self.params.get('postprocessors', []):
 310             pp_class = get_postprocessor(pp_def_raw['key'])
 311             pp_def = dict(pp_def_raw)
 312             del pp_def['key']
 313             pp = pp_class(self, **compat_kwargs(pp_def))
 314             self.add_post_processor(pp)
 315
 316         for ph in self.params.get('progress_hooks', []):
 317             self.add_progress_hook(ph)
 318
 319     def warn_if_short_id(self, argv):
 320         # short YouTube ID starting with dash?
 321         idxs = [
 322             i for i, a in enumerate(argv)
 323             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 324         if idxs:
 325             correct_argv = (
 326                 ['youtube-dl'] +
 327                 [a for i, a in enumerate(argv) if i not in idxs] +
 328                 ['--'] + [argv[i] for i in idxs]
 329             )
 330             self.report_warning(
 331                 'Long argument string detected. '
 332                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 333                 args_to_str(correct_argv))
 334
 335     def add_info_extractor(self, ie):
 336         """Add an InfoExtractor object to the end of the list."""
 337         self._ies.append(ie)
 338         self._ies_instances[ie.ie_key()] = ie
 339         ie.set_downloader(self)
 340
 341     def get_info_extractor(self, ie_key):
 342         """
 343         Get an instance of an IE with name ie_key, it will try to get one from
 344         the _ies list, if there's no instance it will create a new one and add
 345         it to the extractor list.
 346         """
 347         ie = self._ies_instances.get(ie_key)
 348         if ie is None:
 349             ie = get_info_extractor(ie_key)()
 350             self.add_info_extractor(ie)
 351         return ie
 352
 353     def add_default_info_extractors(self):
 354         """
 355         Add the InfoExtractors returned by gen_extractors to the end of the list
 356         """
 357         for ie in gen_extractors():
 358             self.add_info_extractor(ie)
 359
 360     def add_post_processor(self, pp):
 361         """Add a PostProcessor object to the end of the chain."""
 362         self._pps.append(pp)
 363         pp.set_downloader(self)
 364
 365     def add_progress_hook(self, ph):
 366         """Add the progress hook (currently only for the file downloader)"""
 367         self._progress_hooks.append(ph)
 368
 369     def _bidi_workaround(self, message):
 370         if not hasattr(self, '_output_channel'):
 371             return message
 372
 373         assert hasattr(self, '_output_process')
 374         assert isinstance(message, compat_str)
 375         line_count = message.count('\n') + 1
 376         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 377         self._output_process.stdin.flush()
 378         res = ''.join(self._output_channel.readline().decode('utf-8')
 379                       for _ in range(line_count))
 380         return res[:-len('\n')]
 381
 382     def to_screen(self, message, skip_eol=False):
 383         """Print message to stdout if not in quiet mode."""
 384         return self.to_stdout(message, skip_eol, check_quiet=True)
 385
 386     def _write_string(self, s, out=None):
 387         write_string(s, out=out, encoding=self.params.get('encoding'))
 388
 389     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 390         """Print message to stdout if not in quiet mode."""
 391         if self.params.get('logger'):
 392             self.params['logger'].debug(message)
 393         elif not check_quiet or not self.params.get('quiet', False):
 394             message = self._bidi_workaround(message)
 395             terminator = ['\n', ''][skip_eol]
 396             output = message + terminator
 397
 398             self._write_string(output, self._screen_file)
 399
 400     def to_stderr(self, message):
 401         """Print message to stderr."""
 402         assert isinstance(message, compat_str)
 403         if self.params.get('logger'):
 404             self.params['logger'].error(message)
 405         else:
 406             message = self._bidi_workaround(message)
 407             output = message + '\n'
 408             self._write_string(output, self._err_file)
 409
 410     def to_console_title(self, message):
 411         if not self.params.get('consoletitle', False):
 412             return
 413         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 414             # c_wchar_p() might not be necessary if `message` is
 415             # already of type unicode()
 416             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 417         elif 'TERM' in os.environ:
 418             self._write_string('\033]0;%s\007' % message, self._screen_file)
 419
 420     def save_console_title(self):
 421         if not self.params.get('consoletitle', False):
 422             return
 423         if 'TERM' in os.environ:
 424             # Save the title on stack
 425             self._write_string('\033[22;0t', self._screen_file)
 426
 427     def restore_console_title(self):
 428         if not self.params.get('consoletitle', False):
 429             return
 430         if 'TERM' in os.environ:
 431             # Restore the title from stack
 432             self._write_string('\033[23;0t', self._screen_file)
 433
 434     def __enter__(self):
 435         self.save_console_title()
 436         return self
 437
 438     def __exit__(self, *args):
 439         self.restore_console_title()
 440
 441         if self.params.get('cookiefile') is not None:
 442             self.cookiejar.save()
 443
 444     def trouble(self, message=None, tb=None):
 445         """Determine action to take when a download problem appears.
 446
 447         Depending on if the downloader has been configured to ignore
 448         download errors or not, this method may throw an exception or
 449         not when errors are found, after printing the message.
 450
 451         tb, if given, is additional traceback information.
 452         """
 453         if message is not None:
 454             self.to_stderr(message)
 455         if self.params.get('verbose'):
 456             if tb is None:
 457                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 458                     tb = ''
 459                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 460                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 461                     tb += compat_str(traceback.format_exc())
 462                 else:
 463                     tb_data = traceback.format_list(traceback.extract_stack())
 464                     tb = ''.join(tb_data)
 465             self.to_stderr(tb)
 466         if not self.params.get('ignoreerrors', False):
 467             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 468                 exc_info = sys.exc_info()[1].exc_info
 469             else:
 470                 exc_info = sys.exc_info()
 471             raise DownloadError(message, exc_info)
 472         self._download_retcode = 1
 473
 474     def report_warning(self, message):
 475         '''
 476         Print the message to stderr, it will be prefixed with 'WARNING:'
 477         If stderr is a tty file the 'WARNING:' will be colored
 478         '''
 479         if self.params.get('logger') is not None:
 480             self.params['logger'].warning(message)
 481         else:
 482             if self.params.get('no_warnings'):
 483                 return
 484             if self._err_file.isatty() and os.name != 'nt':
 485                 _msg_header = '\033[0;33mWARNING:\033[0m'
 486             else:
 487                 _msg_header = 'WARNING:'
 488             warning_message = '%s %s' % (_msg_header, message)
 489             self.to_stderr(warning_message)
 490
 491     def report_error(self, message, tb=None):
 492         '''
 493         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 494         in red if stderr is a tty file.
 495         '''
 496         if self._err_file.isatty() and os.name != 'nt':
 497             _msg_header = '\033[0;31mERROR:\033[0m'
 498         else:
 499             _msg_header = 'ERROR:'
 500         error_message = '%s %s' % (_msg_header, message)
 501         self.trouble(error_message, tb)
 502
 503     def report_file_already_downloaded(self, file_name):
 504         """Report file has already been fully downloaded."""
 505         try:
 506             self.to_screen('[download] %s has already been downloaded' % file_name)
 507         except UnicodeEncodeError:
 508             self.to_screen('[download] The file has already been downloaded')
 509
 510     def prepare_filename(self, info_dict):
 511         """Generate the output filename."""
 512         try:
 513             template_dict = dict(info_dict)
 514
 515             template_dict['epoch'] = int(time.time())
 516             autonumber_size = self.params.get('autonumber_size')
 517             if autonumber_size is None:
 518                 autonumber_size = 5
 519             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 520             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 521             if template_dict.get('playlist_index') is not None:
 522                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 523             if template_dict.get('resolution') is None:
 524                 if template_dict.get('width') and template_dict.get('height'):
 525                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 526                 elif template_dict.get('height'):
 527                     template_dict['resolution'] = '%sp' % template_dict['height']
 528                 elif template_dict.get('width'):
 529                     template_dict['resolution'] = '?x%d' % template_dict['width']
 530
 531             sanitize = lambda k, v: sanitize_filename(
 532                 compat_str(v),
 533                 restricted=self.params.get('restrictfilenames'),
 534                 is_id=(k == 'id'))
 535             template_dict = dict((k, sanitize(k, v))
 536                                  for k, v in template_dict.items()
 537                                  if v is not None)
 538             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 539
 540             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 541             tmpl = compat_expanduser(outtmpl)
 542             filename = tmpl % template_dict
 543             return filename
 544         except ValueError as err:
 545             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 546             return None
 547
 548     def _match_entry(self, info_dict):
 549         """ Returns None iff the file should be downloaded """
 550
 551         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 552         if 'title' in info_dict:
 553             # This can happen when we're just evaluating the playlist
 554             title = info_dict['title']
 555             matchtitle = self.params.get('matchtitle', False)
 556             if matchtitle:
 557                 if not re.search(matchtitle, title, re.IGNORECASE):
 558                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 559             rejecttitle = self.params.get('rejecttitle', False)
 560             if rejecttitle:
 561                 if re.search(rejecttitle, title, re.IGNORECASE):
 562                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 563         date = info_dict.get('upload_date', None)
 564         if date is not None:
 565             dateRange = self.params.get('daterange', DateRange())
 566             if date not in dateRange:
 567                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 568         view_count = info_dict.get('view_count', None)
 569         if view_count is not None:
 570             min_views = self.params.get('min_views')
 571             if min_views is not None and view_count < min_views:
 572                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 573             max_views = self.params.get('max_views')
 574             if max_views is not None and view_count > max_views:
 575                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 576         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 577             return 'Skipping "%s" because it is age restricted' % title
 578         if self.in_download_archive(info_dict):
 579             return '%s has already been recorded in archive' % video_title
 580         return None
 581
 582     @staticmethod
 583     def add_extra_info(info_dict, extra_info):
 584         '''Set the keys from extra_info in info dict if they are missing'''
 585         for key, value in extra_info.items():
 586             info_dict.setdefault(key, value)
 587
 588     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 589                      process=True):
 590         '''
 591         Returns a list with a dictionary for each video we find.
 592         If 'download', also downloads the videos.
 593         extra_info is a dict containing the extra values to add to each result
 594          '''
 595
 596         if ie_key:
 597             ies = [self.get_info_extractor(ie_key)]
 598         else:
 599             ies = self._ies
 600
 601         for ie in ies:
 602             if not ie.suitable(url):
 603                 continue
 604
 605             if not ie.working():
 606                 self.report_warning('The program functionality for this site has been marked as broken, '
 607                                     'and will probably not work.')
 608
 609             try:
 610                 ie_result = ie.extract(url)
 611                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 612                     break
 613                 if isinstance(ie_result, list):
 614                     # Backwards compatibility: old IE result format
 615                     ie_result = {
 616                         '_type': 'compat_list',
 617                         'entries': ie_result,
 618                     }
 619                 self.add_default_extra_info(ie_result, ie, url)
 620                 if process:
 621                     return self.process_ie_result(ie_result, download, extra_info)
 622                 else:
 623                     return ie_result
 624             except ExtractorError as de:  # An error we somewhat expected
 625                 self.report_error(compat_str(de), de.format_traceback())
 626                 break
 627             except MaxDownloadsReached:
 628                 raise
 629             except Exception as e:
 630                 if self.params.get('ignoreerrors', False):
 631                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 632                     break
 633                 else:
 634                     raise
 635         else:
 636             self.report_error('no suitable InfoExtractor for URL %s' % url)
 637
 638     def add_default_extra_info(self, ie_result, ie, url):
 639         self.add_extra_info(ie_result, {
 640             'extractor': ie.IE_NAME,
 641             'webpage_url': url,
 642             'webpage_url_basename': url_basename(url),
 643             'extractor_key': ie.ie_key(),
 644         })
 645
 646     def process_ie_result(self, ie_result, download=True, extra_info={}):
 647         """
 648         Take the result of the ie(may be modified) and resolve all unresolved
 649         references (URLs, playlist items).
 650
 651         It will also download the videos if 'download'.
 652         Returns the resolved ie_result.
 653         """
 654
 655         result_type = ie_result.get('_type', 'video')
 656
 657         if result_type in ('url', 'url_transparent'):
 658             extract_flat = self.params.get('extract_flat', False)
 659             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 660                     extract_flat is True):
 661                 if self.params.get('forcejson', False):
 662                     self.to_stdout(json.dumps(ie_result))
 663                 return ie_result
 664
 665         if result_type == 'video':
 666             self.add_extra_info(ie_result, extra_info)
 667             return self.process_video_result(ie_result, download=download)
 668         elif result_type == 'url':
 669             # We have to add extra_info to the results because it may be
 670             # contained in a playlist
 671             return self.extract_info(ie_result['url'],
 672                                      download,
 673                                      ie_key=ie_result.get('ie_key'),
 674                                      extra_info=extra_info)
 675         elif result_type == 'url_transparent':
 676             # Use the information from the embedding page
 677             info = self.extract_info(
 678                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 679                 extra_info=extra_info, download=False, process=False)
 680
 681             force_properties = dict(
 682                 (k, v) for k, v in ie_result.items() if v is not None)
 683             for f in ('_type', 'url'):
 684                 if f in force_properties:
 685                     del force_properties[f]
 686             new_result = info.copy()
 687             new_result.update(force_properties)
 688
 689             assert new_result.get('_type') != 'url_transparent'
 690
 691             return self.process_ie_result(
 692                 new_result, download=download, extra_info=extra_info)
 693         elif result_type == 'playlist' or result_type == 'multi_video':
 694             # We process each entry in the playlist
 695             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 696             self.to_screen('[download] Downloading playlist: %s' % playlist)
 697
 698             playlist_results = []
 699
 700             playliststart = self.params.get('playliststart', 1) - 1
 701             playlistend = self.params.get('playlistend', None)
 702             # For backwards compatibility, interpret -1 as whole list
 703             if playlistend == -1:
 704                 playlistend = None
 705
 706             ie_entries = ie_result['entries']
 707             if isinstance(ie_entries, list):
 708                 n_all_entries = len(ie_entries)
 709                 entries = ie_entries[playliststart:playlistend]
 710                 n_entries = len(entries)
 711                 self.to_screen(
 712                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 713                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 714             elif isinstance(ie_entries, PagedList):
 715                 entries = ie_entries.getslice(
 716                     playliststart, playlistend)
 717                 n_entries = len(entries)
 718                 self.to_screen(
 719                     "[%s] playlist %s: Downloading %d videos" %
 720                     (ie_result['extractor'], playlist, n_entries))
 721             else:  # iterable
 722                 entries = list(itertools.islice(
 723                     ie_entries, playliststart, playlistend))
 724                 n_entries = len(entries)
 725                 self.to_screen(
 726                     "[%s] playlist %s: Downloading %d videos" %
 727                     (ie_result['extractor'], playlist, n_entries))
 728
 729             if self.params.get('playlistreverse', False):
 730                 entries = entries[::-1]
 731
 732             for i, entry in enumerate(entries, 1):
 733                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 734                 extra = {
 735                     'n_entries': n_entries,
 736                     'playlist': playlist,
 737                     'playlist_id': ie_result.get('id'),
 738                     'playlist_title': ie_result.get('title'),
 739                     'playlist_index': i + playliststart,
 740                     'extractor': ie_result['extractor'],
 741                     'webpage_url': ie_result['webpage_url'],
 742                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 743                     'extractor_key': ie_result['extractor_key'],
 744                 }
 745
 746                 reason = self._match_entry(entry)
 747                 if reason is not None:
 748                     self.to_screen('[download] ' + reason)
 749                     continue
 750
 751                 entry_result = self.process_ie_result(entry,
 752                                                       download=download,
 753                                                       extra_info=extra)
 754                 playlist_results.append(entry_result)
 755             ie_result['entries'] = playlist_results
 756             return ie_result
 757         elif result_type == 'compat_list':
 758             self.report_warning(
 759                 'Extractor %s returned a compat_list result. '
 760                 'It needs to be updated.' % ie_result.get('extractor'))
 761
 762             def _fixup(r):
 763                 self.add_extra_info(
 764                     r,
 765                     {
 766                         'extractor': ie_result['extractor'],
 767                         'webpage_url': ie_result['webpage_url'],
 768                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 769                         'extractor_key': ie_result['extractor_key'],
 770                     }
 771                 )
 772                 return r
 773             ie_result['entries'] = [
 774                 self.process_ie_result(_fixup(r), download, extra_info)
 775                 for r in ie_result['entries']
 776             ]
 777             return ie_result
 778         else:
 779             raise Exception('Invalid result type: %s' % result_type)
 780
 781     def _apply_format_filter(self, format_spec, available_formats):
 782         " Returns a tuple of the remaining format_spec and filtered formats "
 783
 784         OPERATORS = {
 785             '<': operator.lt,
 786             '<=': operator.le,
 787             '>': operator.gt,
 788             '>=': operator.ge,
 789             '=': operator.eq,
 790             '!=': operator.ne,
 791         }
 792         operator_rex = re.compile(r'''(?x)\s*\[
 793             (?P<key>width|height|tbr|abr|vbr|filesize)
 794             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 795             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 796             \]$
 797             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 798         m = operator_rex.search(format_spec)
 799         if not m:
 800             raise ValueError('Invalid format specification %r' % format_spec)
 801
 802         try:
 803             comparison_value = int(m.group('value'))
 804         except ValueError:
 805             comparison_value = parse_filesize(m.group('value'))
 806             if comparison_value is None:
 807                 comparison_value = parse_filesize(m.group('value') + 'B')
 808             if comparison_value is None:
 809                 raise ValueError(
 810                     'Invalid value %r in format specification %r' % (
 811                         m.group('value'), format_spec))
 812         op = OPERATORS[m.group('op')]
 813
 814         def _filter(f):
 815             actual_value = f.get(m.group('key'))
 816             if actual_value is None:
 817                 return m.group('none_inclusive')
 818             return op(actual_value, comparison_value)
 819         new_formats = [f for f in available_formats if _filter(f)]
 820
 821         new_format_spec = format_spec[:-len(m.group(0))]
 822         if not new_format_spec:
 823             new_format_spec = 'best'
 824
 825         return (new_format_spec, new_formats)
 826
 827     def select_format(self, format_spec, available_formats):
 828         while format_spec.endswith(']'):
 829             format_spec, available_formats = self._apply_format_filter(
 830                 format_spec, available_formats)
 831         if not available_formats:
 832             return None
 833
 834         if format_spec == 'best' or format_spec is None:
 835             return available_formats[-1]
 836         elif format_spec == 'worst':
 837             return available_formats[0]
 838         elif format_spec == 'bestaudio':
 839             audio_formats = [
 840                 f for f in available_formats
 841                 if f.get('vcodec') == 'none']
 842             if audio_formats:
 843                 return audio_formats[-1]
 844         elif format_spec == 'worstaudio':
 845             audio_formats = [
 846                 f for f in available_formats
 847                 if f.get('vcodec') == 'none']
 848             if audio_formats:
 849                 return audio_formats[0]
 850         elif format_spec == 'bestvideo':
 851             video_formats = [
 852                 f for f in available_formats
 853                 if f.get('acodec') == 'none']
 854             if video_formats:
 855                 return video_formats[-1]
 856         elif format_spec == 'worstvideo':
 857             video_formats = [
 858                 f for f in available_formats
 859                 if f.get('acodec') == 'none']
 860             if video_formats:
 861                 return video_formats[0]
 862         else:
 863             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 864             if format_spec in extensions:
 865                 filter_f = lambda f: f['ext'] == format_spec
 866             else:
 867                 filter_f = lambda f: f['format_id'] == format_spec
 868             matches = list(filter(filter_f, available_formats))
 869             if matches:
 870                 return matches[-1]
 871         return None
 872
 873     def _calc_headers(self, info_dict):
 874         res = std_headers.copy()
 875
 876         add_headers = info_dict.get('http_headers')
 877         if add_headers:
 878             res.update(add_headers)
 879
 880         cookies = self._calc_cookies(info_dict)
 881         if cookies:
 882             res['Cookie'] = cookies
 883
 884         return res
 885
 886     def _calc_cookies(self, info_dict):
 887         class _PseudoRequest(object):
 888             def __init__(self, url):
 889                 self.url = url
 890                 self.headers = {}
 891                 self.unverifiable = False
 892
 893             def add_unredirected_header(self, k, v):
 894                 self.headers[k] = v
 895
 896             def get_full_url(self):
 897                 return self.url
 898
 899             def is_unverifiable(self):
 900                 return self.unverifiable
 901
 902             def has_header(self, h):
 903                 return h in self.headers
 904
 905         pr = _PseudoRequest(info_dict['url'])
 906         self.cookiejar.add_cookie_header(pr)
 907         return pr.headers.get('Cookie')
 908
 909     def process_video_result(self, info_dict, download=True):
 910         assert info_dict.get('_type', 'video') == 'video'
 911
 912         if 'id' not in info_dict:
 913             raise ExtractorError('Missing "id" field in extractor result')
 914         if 'title' not in info_dict:
 915             raise ExtractorError('Missing "title" field in extractor result')
 916
 917         if 'playlist' not in info_dict:
 918             # It isn't part of a playlist
 919             info_dict['playlist'] = None
 920             info_dict['playlist_index'] = None
 921
 922         thumbnails = info_dict.get('thumbnails')
 923         if thumbnails is None:
 924             thumbnail = info_dict.get('thumbnail')
 925             if thumbnail:
 926                 thumbnails = [{'url': thumbnail}]
 927         if thumbnails:
 928             thumbnails.sort(key=lambda t: (
 929                 t.get('preference'), t.get('width'), t.get('height'),
 930                 t.get('id'), t.get('url')))
 931             for t in thumbnails:
 932                 if 'width' in t and 'height' in t:
 933                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 934
 935         if thumbnails and 'thumbnail' not in info_dict:
 936             info_dict['thumbnail'] = thumbnails[-1]['url']
 937
 938         if 'display_id' not in info_dict and 'id' in info_dict:
 939             info_dict['display_id'] = info_dict['id']
 940
 941         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 942             # Working around negative timestamps in Windows
 943             # (see http://bugs.python.org/issue1646728)
 944             if info_dict['timestamp'] < 0 and os.name == 'nt':
 945                 info_dict['timestamp'] = 0
 946             upload_date = datetime.datetime.utcfromtimestamp(
 947                 info_dict['timestamp'])
 948             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 949
 950         # This extractors handle format selection themselves
 951         if info_dict['extractor'] in ['Youku']:
 952             if download:
 953                 self.process_info(info_dict)
 954             return info_dict
 955
 956         # We now pick which formats have to be downloaded
 957         if info_dict.get('formats') is None:
 958             # There's only one format available
 959             formats = [info_dict]
 960         else:
 961             formats = info_dict['formats']
 962
 963         if not formats:
 964             raise ExtractorError('No video formats found!')
 965
 966         # We check that all the formats have the format and format_id fields
 967         for i, format in enumerate(formats):
 968             if 'url' not in format:
 969                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 970
 971             if format.get('format_id') is None:
 972                 format['format_id'] = compat_str(i)
 973             if format.get('format') is None:
 974                 format['format'] = '{id} - {res}{note}'.format(
 975                     id=format['format_id'],
 976                     res=self.format_resolution(format),
 977                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 978                 )
 979             # Automatically determine file extension if missing
 980             if 'ext' not in format:
 981                 format['ext'] = determine_ext(format['url']).lower()
 982             # Add HTTP headers, so that external programs can use them from the
 983             # json output
 984             full_format_info = info_dict.copy()
 985             full_format_info.update(format)
 986             format['http_headers'] = self._calc_headers(full_format_info)
 987
 988         format_limit = self.params.get('format_limit', None)
 989         if format_limit:
 990             formats = list(takewhile_inclusive(
 991                 lambda f: f['format_id'] != format_limit, formats
 992             ))
 993
 994         # TODO Central sorting goes here
 995
 996         if formats[0] is not info_dict:
 997             # only set the 'formats' fields if the original info_dict list them
 998             # otherwise we end up with a circular reference, the first (and unique)
 999             # element in the 'formats' field in info_dict is info_dict itself,
1000             # wich can't be exported to json
1001             info_dict['formats'] = formats
1002         if self.params.get('listformats'):
1003             self.list_formats(info_dict)
1004             return
1005         if self.params.get('list_thumbnails'):
1006             self.list_thumbnails(info_dict)
1007             return
1008
1009         req_format = self.params.get('format')
1010         if req_format is None:
1011             req_format = 'best'
1012         formats_to_download = []
1013         # The -1 is for supporting YoutubeIE
1014         if req_format in ('-1', 'all'):
1015             formats_to_download = formats
1016         else:
1017             for rfstr in req_format.split(','):
1018                 # We can accept formats requested in the format: 34/5/best, we pick
1019                 # the first that is available, starting from left
1020                 req_formats = rfstr.split('/')
1021                 for rf in req_formats:
1022                     if re.match(r'.+?\+.+?', rf) is not None:
1023                         # Two formats have been requested like '137+139'
1024                         format_1, format_2 = rf.split('+')
1025                         formats_info = (self.select_format(format_1, formats),
1026                                         self.select_format(format_2, formats))
1027                         if all(formats_info):
1028                             # The first format must contain the video and the
1029                             # second the audio
1030                             if formats_info[0].get('vcodec') == 'none':
1031                                 self.report_error('The first format must '
1032                                                   'contain the video, try using '
1033                                                   '"-f %s+%s"' % (format_2, format_1))
1034                                 return
1035                             output_ext = (
1036                                 formats_info[0]['ext']
1037                                 if self.params.get('merge_output_format') is None
1038                                 else self.params['merge_output_format'])
1039                             selected_format = {
1040                                 'requested_formats': formats_info,
1041                                 'format': rf,
1042                                 'ext': formats_info[0]['ext'],
1043                                 'width': formats_info[0].get('width'),
1044                                 'height': formats_info[0].get('height'),
1045                                 'resolution': formats_info[0].get('resolution'),
1046                                 'fps': formats_info[0].get('fps'),
1047                                 'vcodec': formats_info[0].get('vcodec'),
1048                                 'vbr': formats_info[0].get('vbr'),
1049                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1050                                 'acodec': formats_info[1].get('acodec'),
1051                                 'abr': formats_info[1].get('abr'),
1052                                 'ext': output_ext,
1053                             }
1054                         else:
1055                             selected_format = None
1056                     else:
1057                         selected_format = self.select_format(rf, formats)
1058                     if selected_format is not None:
1059                         formats_to_download.append(selected_format)
1060                         break
1061         if not formats_to_download:
1062             raise ExtractorError('requested format not available',
1063                                  expected=True)
1064
1065         if download:
1066             if len(formats_to_download) > 1:
1067                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1068             for format in formats_to_download:
1069                 new_info = dict(info_dict)
1070                 new_info.update(format)
1071                 self.process_info(new_info)
1072         # We update the info dict with the best quality format (backwards compatibility)
1073         info_dict.update(formats_to_download[-1])
1074         return info_dict
1075
1076     def process_info(self, info_dict):
1077         """Process a single resolved IE result."""
1078
1079         assert info_dict.get('_type', 'video') == 'video'
1080
1081         max_downloads = self.params.get('max_downloads')
1082         if max_downloads is not None:
1083             if self._num_downloads >= int(max_downloads):
1084                 raise MaxDownloadsReached()
1085
1086         info_dict['fulltitle'] = info_dict['title']
1087         if len(info_dict['title']) > 200:
1088             info_dict['title'] = info_dict['title'][:197] + '...'
1089
1090         # Keep for backwards compatibility
1091         info_dict['stitle'] = info_dict['title']
1092
1093         if 'format' not in info_dict:
1094             info_dict['format'] = info_dict['ext']
1095
1096         reason = self._match_entry(info_dict)
1097         if reason is not None:
1098             self.to_screen('[download] ' + reason)
1099             return
1100
1101         self._num_downloads += 1
1102
1103         filename = self.prepare_filename(info_dict)
1104
1105         # Forced printings
1106         if self.params.get('forcetitle', False):
1107             self.to_stdout(info_dict['fulltitle'])
1108         if self.params.get('forceid', False):
1109             self.to_stdout(info_dict['id'])
1110         if self.params.get('forceurl', False):
1111             if info_dict.get('requested_formats') is not None:
1112                 for f in info_dict['requested_formats']:
1113                     self.to_stdout(f['url'] + f.get('play_path', ''))
1114             else:
1115                 # For RTMP URLs, also include the playpath
1116                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1117         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1118             self.to_stdout(info_dict['thumbnail'])
1119         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1120             self.to_stdout(info_dict['description'])
1121         if self.params.get('forcefilename', False) and filename is not None:
1122             self.to_stdout(filename)
1123         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1124             self.to_stdout(formatSeconds(info_dict['duration']))
1125         if self.params.get('forceformat', False):
1126             self.to_stdout(info_dict['format'])
1127         if self.params.get('forcejson', False):
1128             info_dict['_filename'] = filename
1129             self.to_stdout(json.dumps(info_dict))
1130         if self.params.get('dump_single_json', False):
1131             info_dict['_filename'] = filename
1132
1133         # Do nothing else if in simulate mode
1134         if self.params.get('simulate', False):
1135             return
1136
1137         if filename is None:
1138             return
1139
1140         try:
1141             dn = os.path.dirname(encodeFilename(filename))
1142             if dn and not os.path.exists(dn):
1143                 os.makedirs(dn)
1144         except (OSError, IOError) as err:
1145             self.report_error('unable to create directory ' + compat_str(err))
1146             return
1147
1148         if self.params.get('writedescription', False):
1149             descfn = filename + '.description'
1150             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1151                 self.to_screen('[info] Video description is already present')
1152             elif info_dict.get('description') is None:
1153                 self.report_warning('There\'s no description to write.')
1154             else:
1155                 try:
1156                     self.to_screen('[info] Writing video description to: ' + descfn)
1157                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1158                         descfile.write(info_dict['description'])
1159                 except (OSError, IOError):
1160                     self.report_error('Cannot write description file ' + descfn)
1161                     return
1162
1163         if self.params.get('writeannotations', False):
1164             annofn = filename + '.annotations.xml'
1165             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1166                 self.to_screen('[info] Video annotations are already present')
1167             else:
1168                 try:
1169                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1170                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1171                         annofile.write(info_dict['annotations'])
1172                 except (KeyError, TypeError):
1173                     self.report_warning('There are no annotations to write.')
1174                 except (OSError, IOError):
1175                     self.report_error('Cannot write annotations file: ' + annofn)
1176                     return
1177
1178         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1179                                        self.params.get('writeautomaticsub')])
1180
1181         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1182             # subtitles download errors are already managed as troubles in relevant IE
1183             # that way it will silently go on when used with unsupporting IE
1184             subtitles = info_dict['subtitles']
1185             sub_format = self.params.get('subtitlesformat', 'srt')
1186             for sub_lang in subtitles.keys():
1187                 sub = subtitles[sub_lang]
1188                 if sub is None:
1189                     continue
1190                 try:
1191                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1192                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1193                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1194                     else:
1195                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1196                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1197                             subfile.write(sub)
1198                 except (OSError, IOError):
1199                     self.report_error('Cannot write subtitles file ' + sub_filename)
1200                     return
1201
1202         if self.params.get('writeinfojson', False):
1203             infofn = os.path.splitext(filename)[0] + '.info.json'
1204             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1205                 self.to_screen('[info] Video description metadata is already present')
1206             else:
1207                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1208                 try:
1209                     write_json_file(info_dict, infofn)
1210                 except (OSError, IOError):
1211                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1212                     return
1213
1214         self._write_thumbnails(info_dict, filename)
1215
1216         if not self.params.get('skip_download', False):
1217             try:
1218                 def dl(name, info):
1219                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1220                     for ph in self._progress_hooks:
1221                         fd.add_progress_hook(ph)
1222                     if self.params.get('verbose'):
1223                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1224                     return fd.download(name, info)
1225                 if info_dict.get('requested_formats') is not None:
1226                     downloaded = []
1227                     success = True
1228                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1229                     if not merger._executable:
1230                         postprocessors = []
1231                         self.report_warning('You have requested multiple '
1232                                             'formats but ffmpeg or avconv are not installed.'
1233                                             ' The formats won\'t be merged')
1234                     else:
1235                         postprocessors = [merger]
1236                     for f in info_dict['requested_formats']:
1237                         new_info = dict(info_dict)
1238                         new_info.update(f)
1239                         fname = self.prepare_filename(new_info)
1240                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1241                         downloaded.append(fname)
1242                         partial_success = dl(fname, new_info)
1243                         success = success and partial_success
1244                     info_dict['__postprocessors'] = postprocessors
1245                     info_dict['__files_to_merge'] = downloaded
1246                 else:
1247                     # Just a single file
1248                     success = dl(filename, info_dict)
1249             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1250                 self.report_error('unable to download video data: %s' % str(err))
1251                 return
1252             except (OSError, IOError) as err:
1253                 raise UnavailableVideoError(err)
1254             except (ContentTooShortError, ) as err:
1255                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1256                 return
1257
1258             if success:
1259                 # Fixup content
1260                 fixup_policy = self.params.get('fixup')
1261                 if fixup_policy is None:
1262                     fixup_policy = 'detect_or_warn'
1263
1264                 stretched_ratio = info_dict.get('stretched_ratio')
1265                 if stretched_ratio is not None and stretched_ratio != 1:
1266                     if fixup_policy == 'warn':
1267                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1268                             info_dict['id'], stretched_ratio))
1269                     elif fixup_policy == 'detect_or_warn':
1270                         stretched_pp = FFmpegFixupStretchedPP(self)
1271                         if stretched_pp.available:
1272                             info_dict.setdefault('__postprocessors', [])
1273                             info_dict['__postprocessors'].append(stretched_pp)
1274                         else:
1275                             self.report_warning(
1276                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1277                                     info_dict['id'], stretched_ratio))
1278                     else:
1279                         assert fixup_policy in ('ignore', 'never')
1280
1281                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1282                     if fixup_policy == 'warn':
1283                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1284                             info_dict['id']))
1285                     elif fixup_policy == 'detect_or_warn':
1286                         fixup_pp = FFmpegFixupM4aPP(self)
1287                         if fixup_pp.available:
1288                             info_dict.setdefault('__postprocessors', [])
1289                             info_dict['__postprocessors'].append(fixup_pp)
1290                         else:
1291                             self.report_warning(
1292                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1293                                     info_dict['id']))
1294                     else:
1295                         assert fixup_policy in ('ignore', 'never')
1296
1297                 try:
1298                     self.post_process(filename, info_dict)
1299                 except (PostProcessingError) as err:
1300                     self.report_error('postprocessing: %s' % str(err))
1301                     return
1302                 self.record_download_archive(info_dict)
1303
1304     def download(self, url_list):
1305         """Download a given list of URLs."""
1306         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1307         if (len(url_list) > 1 and
1308                 '%' not in outtmpl
1309                 and self.params.get('max_downloads') != 1):
1310             raise SameFileError(outtmpl)
1311
1312         for url in url_list:
1313             try:
1314                 # It also downloads the videos
1315                 res = self.extract_info(url)
1316             except UnavailableVideoError:
1317                 self.report_error('unable to download video')
1318             except MaxDownloadsReached:
1319                 self.to_screen('[info] Maximum number of downloaded files reached.')
1320                 raise
1321             else:
1322                 if self.params.get('dump_single_json', False):
1323                     self.to_stdout(json.dumps(res))
1324
1325         return self._download_retcode
1326
1327     def download_with_info_file(self, info_filename):
1328         with io.open(info_filename, 'r', encoding='utf-8') as f:
1329             info = json.load(f)
1330         try:
1331             self.process_ie_result(info, download=True)
1332         except DownloadError:
1333             webpage_url = info.get('webpage_url')
1334             if webpage_url is not None:
1335                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1336                 return self.download([webpage_url])
1337             else:
1338                 raise
1339         return self._download_retcode
1340
1341     def post_process(self, filename, ie_info):
1342         """Run all the postprocessors on the given file."""
1343         info = dict(ie_info)
1344         info['filepath'] = filename
1345         pps_chain = []
1346         if ie_info.get('__postprocessors') is not None:
1347             pps_chain.extend(ie_info['__postprocessors'])
1348         pps_chain.extend(self._pps)
1349         for pp in pps_chain:
1350             keep_video = None
1351             old_filename = info['filepath']
1352             try:
1353                 keep_video_wish, info = pp.run(info)
1354                 if keep_video_wish is not None:
1355                     if keep_video_wish:
1356                         keep_video = keep_video_wish
1357                     elif keep_video is None:
1358                         # No clear decision yet, let IE decide
1359                         keep_video = keep_video_wish
1360             except PostProcessingError as e:
1361                 self.report_error(e.msg)
1362             if keep_video is False and not self.params.get('keepvideo', False):
1363                 try:
1364                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1365                     os.remove(encodeFilename(old_filename))
1366                 except (IOError, OSError):
1367                     self.report_warning('Unable to remove downloaded video file')
1368
1369     def _make_archive_id(self, info_dict):
1370         # Future-proof against any change in case
1371         # and backwards compatibility with prior versions
1372         extractor = info_dict.get('extractor_key')
1373         if extractor is None:
1374             if 'id' in info_dict:
1375                 extractor = info_dict.get('ie_key')  # key in a playlist
1376         if extractor is None:
1377             return None  # Incomplete video information
1378         return extractor.lower() + ' ' + info_dict['id']
1379
1380     def in_download_archive(self, info_dict):
1381         fn = self.params.get('download_archive')
1382         if fn is None:
1383             return False
1384
1385         vid_id = self._make_archive_id(info_dict)
1386         if vid_id is None:
1387             return False  # Incomplete video information
1388
1389         try:
1390             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1391                 for line in archive_file:
1392                     if line.strip() == vid_id:
1393                         return True
1394         except IOError as ioe:
1395             if ioe.errno != errno.ENOENT:
1396                 raise
1397         return False
1398
1399     def record_download_archive(self, info_dict):
1400         fn = self.params.get('download_archive')
1401         if fn is None:
1402             return
1403         vid_id = self._make_archive_id(info_dict)
1404         assert vid_id
1405         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1406             archive_file.write(vid_id + '\n')
1407
1408     @staticmethod
1409     def format_resolution(format, default='unknown'):
1410         if format.get('vcodec') == 'none':
1411             return 'audio only'
1412         if format.get('resolution') is not None:
1413             return format['resolution']
1414         if format.get('height') is not None:
1415             if format.get('width') is not None:
1416                 res = '%sx%s' % (format['width'], format['height'])
1417             else:
1418                 res = '%sp' % format['height']
1419         elif format.get('width') is not None:
1420             res = '?x%d' % format['width']
1421         else:
1422             res = default
1423         return res
1424
1425     def _format_note(self, fdict):
1426         res = ''
1427         if fdict.get('ext') in ['f4f', 'f4m']:
1428             res += '(unsupported) '
1429         if fdict.get('format_note') is not None:
1430             res += fdict['format_note'] + ' '
1431         if fdict.get('tbr') is not None:
1432             res += '%4dk ' % fdict['tbr']
1433         if fdict.get('container') is not None:
1434             if res:
1435                 res += ', '
1436             res += '%s container' % fdict['container']
1437         if (fdict.get('vcodec') is not None and
1438                 fdict.get('vcodec') != 'none'):
1439             if res:
1440                 res += ', '
1441             res += fdict['vcodec']
1442             if fdict.get('vbr') is not None:
1443                 res += '@'
1444         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1445             res += 'video@'
1446         if fdict.get('vbr') is not None:
1447             res += '%4dk' % fdict['vbr']
1448         if fdict.get('fps') is not None:
1449             res += ', %sfps' % fdict['fps']
1450         if fdict.get('acodec') is not None:
1451             if res:
1452                 res += ', '
1453             if fdict['acodec'] == 'none':
1454                 res += 'video only'
1455             else:
1456                 res += '%-5s' % fdict['acodec']
1457         elif fdict.get('abr') is not None:
1458             if res:
1459                 res += ', '
1460             res += 'audio'
1461         if fdict.get('abr') is not None:
1462             res += '@%3dk' % fdict['abr']
1463         if fdict.get('asr') is not None:
1464             res += ' (%5dHz)' % fdict['asr']
1465         if fdict.get('filesize') is not None:
1466             if res:
1467                 res += ', '
1468             res += format_bytes(fdict['filesize'])
1469         elif fdict.get('filesize_approx') is not None:
1470             if res:
1471                 res += ', '
1472             res += '~' + format_bytes(fdict['filesize_approx'])
1473         return res
1474
1475     def list_formats(self, info_dict):
1476         def line(format, idlen=20):
1477             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1478                 format['format_id'],
1479                 format['ext'],
1480                 self.format_resolution(format),
1481                 self._format_note(format),
1482             ))
1483
1484         formats = info_dict.get('formats', [info_dict])
1485         idlen = max(len('format code'),
1486                     max(len(f['format_id']) for f in formats))
1487         formats_s = [
1488             line(f, idlen) for f in formats
1489             if f.get('preference') is None or f['preference'] >= -1000]
1490         if len(formats) > 1:
1491             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1492             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1493
1494         header_line = line({
1495             'format_id': 'format code', 'ext': 'extension',
1496             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1497         self.to_screen(
1498             '[info] Available formats for %s:\n%s\n%s' %
1499             (info_dict['id'], header_line, '\n'.join(formats_s)))
1500
1501     def list_thumbnails(self, info_dict):
1502         thumbnails = info_dict.get('thumbnails')
1503         if not thumbnails:
1504             tn_url = info_dict.get('thumbnail')
1505             if tn_url:
1506                 thumbnails = [{'id': '0', 'url': tn_url}]
1507             else:
1508                 self.to_screen(
1509                     '[info] No thumbnails present for %s' % info_dict['id'])
1510                 return
1511
1512         self.to_screen(
1513             '[info] Thumbnails for %s:' % info_dict['id'])
1514         self.to_screen(render_table(
1515             ['ID', 'width', 'height', 'URL'],
1516             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1517
1518     def urlopen(self, req):
1519         """ Start an HTTP download """
1520
1521         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1522         # always respected by websites, some tend to give out URLs with non percent-encoded
1523         # non-ASCII characters (see telemb.py, ard.py [#3412])
1524         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1525         # To work around aforementioned issue we will replace request's original URL with
1526         # percent-encoded one
1527         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1528         url = req if req_is_string else req.get_full_url()
1529         url_escaped = escape_url(url)
1530
1531         # Substitute URL if any change after escaping
1532         if url != url_escaped:
1533             if req_is_string:
1534                 req = url_escaped
1535             else:
1536                 req = compat_urllib_request.Request(
1537                     url_escaped, data=req.data, headers=req.headers,
1538                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1539
1540         return self._opener.open(req, timeout=self._socket_timeout)
1541
1542     def print_debug_header(self):
1543         if not self.params.get('verbose'):
1544             return
1545
1546         if type('') is not compat_str:
1547             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1548             self.report_warning(
1549                 'Your Python is broken! Update to a newer and supported version')
1550
1551         stdout_encoding = getattr(
1552             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1553         encoding_str = (
1554             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1555                 locale.getpreferredencoding(),
1556                 sys.getfilesystemencoding(),
1557                 stdout_encoding,
1558                 self.get_encoding()))
1559         write_string(encoding_str, encoding=None)
1560
1561         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1562         try:
1563             sp = subprocess.Popen(
1564                 ['git', 'rev-parse', '--short', 'HEAD'],
1565                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1566                 cwd=os.path.dirname(os.path.abspath(__file__)))
1567             out, err = sp.communicate()
1568             out = out.decode().strip()
1569             if re.match('[0-9a-f]+', out):
1570                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1571         except:
1572             try:
1573                 sys.exc_clear()
1574             except:
1575                 pass
1576         self._write_string('[debug] Python version %s - %s\n' % (
1577             platform.python_version(), platform_name()))
1578
1579         exe_versions = FFmpegPostProcessor.get_versions()
1580         exe_versions['rtmpdump'] = rtmpdump_version()
1581         exe_str = ', '.join(
1582             '%s %s' % (exe, v)
1583             for exe, v in sorted(exe_versions.items())
1584             if v
1585         )
1586         if not exe_str:
1587             exe_str = 'none'
1588         self._write_string('[debug] exe versions: %s\n' % exe_str)
1589
1590         proxy_map = {}
1591         for handler in self._opener.handlers:
1592             if hasattr(handler, 'proxies'):
1593                 proxy_map.update(handler.proxies)
1594         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1595
1596         if self.params.get('call_home', False):
1597             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1598             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1599             latest_version = self.urlopen(
1600                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1601             if version_tuple(latest_version) > version_tuple(__version__):
1602                 self.report_warning(
1603                     'You are using an outdated version (newest version: %s)! '
1604                     'See https://yt-dl.org/update if you need help updating.' %
1605                     latest_version)
1606
1607     def _setup_opener(self):
1608         timeout_val = self.params.get('socket_timeout')
1609         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1610
1611         opts_cookiefile = self.params.get('cookiefile')
1612         opts_proxy = self.params.get('proxy')
1613
1614         if opts_cookiefile is None:
1615             self.cookiejar = compat_cookiejar.CookieJar()
1616         else:
1617             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1618                 opts_cookiefile)
1619             if os.access(opts_cookiefile, os.R_OK):
1620                 self.cookiejar.load()
1621
1622         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1623             self.cookiejar)
1624         if opts_proxy is not None:
1625             if opts_proxy == '':
1626                 proxies = {}
1627             else:
1628                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1629         else:
1630             proxies = compat_urllib_request.getproxies()
1631             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1632             if 'http' in proxies and 'https' not in proxies:
1633                 proxies['https'] = proxies['http']
1634         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1635
1636         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1637         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1638         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1639         opener = compat_urllib_request.build_opener(
1640             https_handler, proxy_handler, cookie_processor, ydlh)
1641         # Delete the default user-agent header, which would otherwise apply in
1642         # cases where our custom HTTP handler doesn't come into play
1643         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1644         opener.addheaders = []
1645         self._opener = opener
1646
1647     def encode(self, s):
1648         if isinstance(s, bytes):
1649             return s  # Already encoded
1650
1651         try:
1652             return s.encode(self.get_encoding())
1653         except UnicodeEncodeError as err:
1654             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1655             raise
1656
1657     def get_encoding(self):
1658         encoding = self.params.get('encoding')
1659         if encoding is None:
1660             encoding = preferredencoding()
1661         return encoding
1662
1663     def _write_thumbnails(self, info_dict, filename):
1664         if self.params.get('writethumbnail', False):
1665             thumbnails = info_dict.get('thumbnails')
1666             if thumbnails:
1667                 thumbnails = [thumbnails[-1]]
1668         elif self.params.get('write_all_thumbnails', False):
1669             thumbnails = info_dict.get('thumbnails')
1670         else:
1671             return
1672
1673         if not thumbnails:
1674             # No thumbnails present, so return immediately
1675             return
1676
1677         for t in thumbnails:
1678             thumb_ext = determine_ext(t['url'], 'jpg')
1679             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1680             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1681             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1682
1683             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1684                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1685                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1686             else:
1687                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1688                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1689                 try:
1690                     uf = self.urlopen(t['url'])
1691                     with open(thumb_filename, 'wb') as thumbf:
1692                         shutil.copyfileobj(uf, thumbf)
1693                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1694                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1695                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1696                     self.report_warning('Unable to download thumbnail "%s": %s' %
1697                                         (t['url'], compat_str(err)))