youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     std_headers,
  60     subtitles_filename,
  61     takewhile_inclusive,
  62     UnavailableVideoError,
  63     url_basename,
  64     version_tuple,
  65     write_json_file,
  66     write_string,
  67     YoutubeDLHandler,
  68     prepend_extension,
  69     args_to_str,
  70     age_restricted,
  71 )
  72 from .cache import Cache
  73 from .extractor import get_info_extractor, gen_extractors
  74 from .downloader import get_suitable_downloader
  75 from .downloader.rtmp import rtmpdump_version
  76 from .postprocessor import (
  77     FFmpegFixupM4aPP,
  78     FFmpegFixupStretchedPP,
  79     FFmpegMergerPP,
  80     FFmpegPostProcessor,
  81     get_postprocessor,
  82 )
  83 from .version import __version__
  84
  85
  86 class YoutubeDL(object):
  87     """YoutubeDL class.
  88
  89     YoutubeDL objects are the ones responsible of downloading the
  90     actual video file and writing it to disk if the user has requested
  91     it, among some other tasks. In most cases there should be one per
  92     program. As, given a video URL, the downloader doesn't know how to
  93     extract all the needed information, task that InfoExtractors do, it
  94     has to pass the URL to one of them.
  95
  96     For this, YoutubeDL objects have a method that allows
  97     InfoExtractors to be registered in a given order. When it is passed
  98     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  99     finds that reports being able to handle it. The InfoExtractor extracts
 100     all the information about the video or videos the URL refers to, and
 101     YoutubeDL process the extracted information, possibly using a File
 102     Downloader to download the video.
 103
 104     YoutubeDL objects accept a lot of parameters. In order not to saturate
 105     the object constructor with arguments, it receives a dictionary of
 106     options instead. These options are available through the params
 107     attribute for the InfoExtractors to use. The YoutubeDL also
 108     registers itself as the downloader in charge for the InfoExtractors
 109     that are added to it, so this is a "mutual registration".
 110
 111     Available options:
 112
 113     username:          Username for authentication purposes.
 114     password:          Password for authentication purposes.
 115     videopassword:     Password for acces a video.
 116     usenetrc:          Use netrc for authentication instead.
 117     verbose:           Print additional info to stdout.
 118     quiet:             Do not print messages to stdout.
 119     no_warnings:       Do not print out anything for warnings.
 120     forceurl:          Force printing final URL.
 121     forcetitle:        Force printing title.
 122     forceid:           Force printing ID.
 123     forcethumbnail:    Force printing thumbnail URL.
 124     forcedescription:  Force printing description.
 125     forcefilename:     Force printing final filename.
 126     forceduration:     Force printing duration.
 127     forcejson:         Force printing info_dict as JSON.
 128     dump_single_json:  Force printing the info_dict of the whole playlist
 129                        (or video) as a single JSON line.
 130     simulate:          Do not download the video files.
 131     format:            Video format code. See options.py for more information.
 132     format_limit:      Highest quality format to try.
 133     outtmpl:           Template for output names.
 134     restrictfilenames: Do not allow "&" and spaces in file names
 135     ignoreerrors:      Do not stop on download errors.
 136     nooverwrites:      Prevent overwriting files.
 137     playliststart:     Playlist item to start at.
 138     playlistend:       Playlist item to end at.
 139     playlistreverse:   Download playlist items in reverse order.
 140     matchtitle:        Download only matching titles.
 141     rejecttitle:       Reject downloads for matching titles.
 142     logger:            Log messages to a logging.Logger instance.
 143     logtostderr:       Log messages to stderr instead of stdout.
 144     writedescription:  Write the video description to a .description file
 145     writeinfojson:     Write the video description to a .info.json file
 146     writeannotations:  Write the video annotations to a .annotations.xml file
 147     writethumbnail:    Write the thumbnail image to a file
 148     writesubtitles:    Write the video subtitles to a file
 149     writeautomaticsub: Write the automatic subtitles to a file
 150     allsubtitles:      Downloads all the subtitles of the video
 151                        (requires writesubtitles or writeautomaticsub)
 152     listsubtitles:     Lists all available subtitles for the video
 153     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 154     subtitleslangs:    List of languages of the subtitles to download
 155     keepvideo:         Keep the video file after post-processing
 156     daterange:         A DateRange object, download only if the upload_date is in the range.
 157     skip_download:     Skip the actual download of the video file
 158     cachedir:          Location of the cache files in the filesystem.
 159                        False to disable filesystem cache.
 160     noplaylist:        Download single video instead of a playlist if in doubt.
 161     age_limit:         An integer representing the user's age in years.
 162                        Unsuitable videos for the given age are skipped.
 163     min_views:         An integer representing the minimum view count the video
 164                        must have in order to not be skipped.
 165                        Videos without view count information are always
 166                        downloaded. None for no limit.
 167     max_views:         An integer representing the maximum view count.
 168                        Videos that are more popular than that are not
 169                        downloaded.
 170                        Videos without view count information are always
 171                        downloaded. None for no limit.
 172     download_archive:  File name of a file where all downloads are recorded.
 173                        Videos already present in the file are not downloaded
 174                        again.
 175     cookiefile:        File name where cookies should be read from and dumped to.
 176     nocheckcertificate:Do not verify SSL certificates
 177     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 178                        At the moment, this is only supported by YouTube.
 179     proxy:             URL of the proxy server to use
 180     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 181     bidi_workaround:   Work around buggy terminals without bidirectional text
 182                        support, using fridibi
 183     debug_printtraffic:Print out sent and received HTTP traffic
 184     include_ads:       Download ads as well
 185     default_search:    Prepend this string if an input url is not valid.
 186                        'auto' for elaborate guessing
 187     encoding:          Use this encoding instead of the system-specified.
 188     extract_flat:      Do not resolve URLs, return the immediate result.
 189                        Pass in 'in_playlist' to only show this behavior for
 190                        playlist items.
 191     postprocessors:    A list of dictionaries, each with an entry
 192                        * key:  The name of the postprocessor. See
 193                                youtube_dl/postprocessor/__init__.py for a list.
 194                        as well as any further keyword arguments for the
 195                        postprocessor.
 196     progress_hooks:    A list of functions that get called on download
 197                        progress, with a dictionary with the entries
 198                        * filename: The final filename
 199                        * status: One of "downloading" and "finished"
 200
 201                        The dict may also have some of the following entries:
 202
 203                        * downloaded_bytes: Bytes on disk
 204                        * total_bytes: Size of the whole file, None if unknown
 205                        * tmpfilename: The filename we're currently writing to
 206                        * eta: The estimated time in seconds, None if unknown
 207                        * speed: The download speed in bytes/second, None if
 208                                 unknown
 209
 210                        Progress hooks are guaranteed to be called at least once
 211                        (with status "finished") if the download is successful.
 212     merge_output_format: Extension to use when merging formats.
 213     fixup:             Automatically correct known faults of the file.
 214                        One of:
 215                        - "never": do nothing
 216                        - "warn": only emit a warning
 217                        - "detect_or_warn": check whether we can do anything
 218                                            about it, warn otherwise (default)
 219     source_address:    (Experimental) Client-side IP address to bind to.
 220     call_home:         Boolean, true iff we are allowed to contact the
 221                        youtube-dl servers for debugging.
 222     sleep_interval:    Number of seconds to sleep before each download.
 223     external_downloader:  Executable of the external downloader to call.
 224
 225
 226     The following parameters are not used by YoutubeDL itself, they are used by
 227     the FileDownloader:
 228     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 229     noresizebuffer, retries, continuedl, noprogress, consoletitle
 230
 231     The following options are used by the post processors:
 232     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 233                        otherwise prefer avconv.
 234     exec_cmd:          Arbitrary command to run after downloading
 235     """
 236
 237     params = None
 238     _ies = []
 239     _pps = []
 240     _download_retcode = None
 241     _num_downloads = None
 242     _screen_file = None
 243
 244     def __init__(self, params=None, auto_init=True):
 245         """Create a FileDownloader object with the given options."""
 246         if params is None:
 247             params = {}
 248         self._ies = []
 249         self._ies_instances = {}
 250         self._pps = []
 251         self._progress_hooks = []
 252         self._download_retcode = 0
 253         self._num_downloads = 0
 254         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 255         self._err_file = sys.stderr
 256         self.params = params
 257         self.cache = Cache(self)
 258
 259         if params.get('bidi_workaround', False):
 260             try:
 261                 import pty
 262                 master, slave = pty.openpty()
 263                 width = get_term_width()
 264                 if width is None:
 265                     width_args = []
 266                 else:
 267                     width_args = ['-w', str(width)]
 268                 sp_kwargs = dict(
 269                     stdin=subprocess.PIPE,
 270                     stdout=slave,
 271                     stderr=self._err_file)
 272                 try:
 273                     self._output_process = subprocess.Popen(
 274                         ['bidiv'] + width_args, **sp_kwargs
 275                     )
 276                 except OSError:
 277                     self._output_process = subprocess.Popen(
 278                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 279                 self._output_channel = os.fdopen(master, 'rb')
 280             except OSError as ose:
 281                 if ose.errno == 2:
 282                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 283                 else:
 284                     raise
 285
 286         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 287                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 288                 and not params.get('restrictfilenames', False)):
 289             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 290             self.report_warning(
 291                 'Assuming --restrict-filenames since file system encoding '
 292                 'cannot encode all characters. '
 293                 'Set the LC_ALL environment variable to fix this.')
 294             self.params['restrictfilenames'] = True
 295
 296         if '%(stitle)s' in self.params.get('outtmpl', ''):
 297             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 298
 299         self._setup_opener()
 300
 301         if auto_init:
 302             self.print_debug_header()
 303             self.add_default_info_extractors()
 304
 305         for pp_def_raw in self.params.get('postprocessors', []):
 306             pp_class = get_postprocessor(pp_def_raw['key'])
 307             pp_def = dict(pp_def_raw)
 308             del pp_def['key']
 309             pp = pp_class(self, **compat_kwargs(pp_def))
 310             self.add_post_processor(pp)
 311
 312         for ph in self.params.get('progress_hooks', []):
 313             self.add_progress_hook(ph)
 314
 315     def warn_if_short_id(self, argv):
 316         # short YouTube ID starting with dash?
 317         idxs = [
 318             i for i, a in enumerate(argv)
 319             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 320         if idxs:
 321             correct_argv = (
 322                 ['youtube-dl'] +
 323                 [a for i, a in enumerate(argv) if i not in idxs] +
 324                 ['--'] + [argv[i] for i in idxs]
 325             )
 326             self.report_warning(
 327                 'Long argument string detected. '
 328                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 329                 args_to_str(correct_argv))
 330
 331     def add_info_extractor(self, ie):
 332         """Add an InfoExtractor object to the end of the list."""
 333         self._ies.append(ie)
 334         self._ies_instances[ie.ie_key()] = ie
 335         ie.set_downloader(self)
 336
 337     def get_info_extractor(self, ie_key):
 338         """
 339         Get an instance of an IE with name ie_key, it will try to get one from
 340         the _ies list, if there's no instance it will create a new one and add
 341         it to the extractor list.
 342         """
 343         ie = self._ies_instances.get(ie_key)
 344         if ie is None:
 345             ie = get_info_extractor(ie_key)()
 346             self.add_info_extractor(ie)
 347         return ie
 348
 349     def add_default_info_extractors(self):
 350         """
 351         Add the InfoExtractors returned by gen_extractors to the end of the list
 352         """
 353         for ie in gen_extractors():
 354             self.add_info_extractor(ie)
 355
 356     def add_post_processor(self, pp):
 357         """Add a PostProcessor object to the end of the chain."""
 358         self._pps.append(pp)
 359         pp.set_downloader(self)
 360
 361     def add_progress_hook(self, ph):
 362         """Add the progress hook (currently only for the file downloader)"""
 363         self._progress_hooks.append(ph)
 364
 365     def _bidi_workaround(self, message):
 366         if not hasattr(self, '_output_channel'):
 367             return message
 368
 369         assert hasattr(self, '_output_process')
 370         assert isinstance(message, compat_str)
 371         line_count = message.count('\n') + 1
 372         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 373         self._output_process.stdin.flush()
 374         res = ''.join(self._output_channel.readline().decode('utf-8')
 375                       for _ in range(line_count))
 376         return res[:-len('\n')]
 377
 378     def to_screen(self, message, skip_eol=False):
 379         """Print message to stdout if not in quiet mode."""
 380         return self.to_stdout(message, skip_eol, check_quiet=True)
 381
 382     def _write_string(self, s, out=None):
 383         write_string(s, out=out, encoding=self.params.get('encoding'))
 384
 385     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 386         """Print message to stdout if not in quiet mode."""
 387         if self.params.get('logger'):
 388             self.params['logger'].debug(message)
 389         elif not check_quiet or not self.params.get('quiet', False):
 390             message = self._bidi_workaround(message)
 391             terminator = ['\n', ''][skip_eol]
 392             output = message + terminator
 393
 394             self._write_string(output, self._screen_file)
 395
 396     def to_stderr(self, message):
 397         """Print message to stderr."""
 398         assert isinstance(message, compat_str)
 399         if self.params.get('logger'):
 400             self.params['logger'].error(message)
 401         else:
 402             message = self._bidi_workaround(message)
 403             output = message + '\n'
 404             self._write_string(output, self._err_file)
 405
 406     def to_console_title(self, message):
 407         if not self.params.get('consoletitle', False):
 408             return
 409         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 410             # c_wchar_p() might not be necessary if `message` is
 411             # already of type unicode()
 412             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 413         elif 'TERM' in os.environ:
 414             self._write_string('\033]0;%s\007' % message, self._screen_file)
 415
 416     def save_console_title(self):
 417         if not self.params.get('consoletitle', False):
 418             return
 419         if 'TERM' in os.environ:
 420             # Save the title on stack
 421             self._write_string('\033[22;0t', self._screen_file)
 422
 423     def restore_console_title(self):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if 'TERM' in os.environ:
 427             # Restore the title from stack
 428             self._write_string('\033[23;0t', self._screen_file)
 429
 430     def __enter__(self):
 431         self.save_console_title()
 432         return self
 433
 434     def __exit__(self, *args):
 435         self.restore_console_title()
 436
 437         if self.params.get('cookiefile') is not None:
 438             self.cookiejar.save()
 439
 440     def trouble(self, message=None, tb=None):
 441         """Determine action to take when a download problem appears.
 442
 443         Depending on if the downloader has been configured to ignore
 444         download errors or not, this method may throw an exception or
 445         not when errors are found, after printing the message.
 446
 447         tb, if given, is additional traceback information.
 448         """
 449         if message is not None:
 450             self.to_stderr(message)
 451         if self.params.get('verbose'):
 452             if tb is None:
 453                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 454                     tb = ''
 455                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 456                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 457                     tb += compat_str(traceback.format_exc())
 458                 else:
 459                     tb_data = traceback.format_list(traceback.extract_stack())
 460                     tb = ''.join(tb_data)
 461             self.to_stderr(tb)
 462         if not self.params.get('ignoreerrors', False):
 463             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 464                 exc_info = sys.exc_info()[1].exc_info
 465             else:
 466                 exc_info = sys.exc_info()
 467             raise DownloadError(message, exc_info)
 468         self._download_retcode = 1
 469
 470     def report_warning(self, message):
 471         '''
 472         Print the message to stderr, it will be prefixed with 'WARNING:'
 473         If stderr is a tty file the 'WARNING:' will be colored
 474         '''
 475         if self.params.get('logger') is not None:
 476             self.params['logger'].warning(message)
 477         else:
 478             if self.params.get('no_warnings'):
 479                 return
 480             if self._err_file.isatty() and os.name != 'nt':
 481                 _msg_header = '\033[0;33mWARNING:\033[0m'
 482             else:
 483                 _msg_header = 'WARNING:'
 484             warning_message = '%s %s' % (_msg_header, message)
 485             self.to_stderr(warning_message)
 486
 487     def report_error(self, message, tb=None):
 488         '''
 489         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 490         in red if stderr is a tty file.
 491         '''
 492         if self._err_file.isatty() and os.name != 'nt':
 493             _msg_header = '\033[0;31mERROR:\033[0m'
 494         else:
 495             _msg_header = 'ERROR:'
 496         error_message = '%s %s' % (_msg_header, message)
 497         self.trouble(error_message, tb)
 498
 499     def report_file_already_downloaded(self, file_name):
 500         """Report file has already been fully downloaded."""
 501         try:
 502             self.to_screen('[download] %s has already been downloaded' % file_name)
 503         except UnicodeEncodeError:
 504             self.to_screen('[download] The file has already been downloaded')
 505
 506     def prepare_filename(self, info_dict):
 507         """Generate the output filename."""
 508         try:
 509             template_dict = dict(info_dict)
 510
 511             template_dict['epoch'] = int(time.time())
 512             autonumber_size = self.params.get('autonumber_size')
 513             if autonumber_size is None:
 514                 autonumber_size = 5
 515             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 516             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 517             if template_dict.get('playlist_index') is not None:
 518                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 519             if template_dict.get('resolution') is None:
 520                 if template_dict.get('width') and template_dict.get('height'):
 521                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 522                 elif template_dict.get('height'):
 523                     template_dict['resolution'] = '%sp' % template_dict['height']
 524                 elif template_dict.get('width'):
 525                     template_dict['resolution'] = '?x%d' % template_dict['width']
 526
 527             sanitize = lambda k, v: sanitize_filename(
 528                 compat_str(v),
 529                 restricted=self.params.get('restrictfilenames'),
 530                 is_id=(k == 'id'))
 531             template_dict = dict((k, sanitize(k, v))
 532                                  for k, v in template_dict.items()
 533                                  if v is not None)
 534             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 535
 536             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 537             tmpl = compat_expanduser(outtmpl)
 538             filename = tmpl % template_dict
 539             return filename
 540         except ValueError as err:
 541             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 542             return None
 543
 544     def _match_entry(self, info_dict):
 545         """ Returns None iff the file should be downloaded """
 546
 547         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 548         if 'title' in info_dict:
 549             # This can happen when we're just evaluating the playlist
 550             title = info_dict['title']
 551             matchtitle = self.params.get('matchtitle', False)
 552             if matchtitle:
 553                 if not re.search(matchtitle, title, re.IGNORECASE):
 554                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 555             rejecttitle = self.params.get('rejecttitle', False)
 556             if rejecttitle:
 557                 if re.search(rejecttitle, title, re.IGNORECASE):
 558                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 559         date = info_dict.get('upload_date', None)
 560         if date is not None:
 561             dateRange = self.params.get('daterange', DateRange())
 562             if date not in dateRange:
 563                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 564         view_count = info_dict.get('view_count', None)
 565         if view_count is not None:
 566             min_views = self.params.get('min_views')
 567             if min_views is not None and view_count < min_views:
 568                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 569             max_views = self.params.get('max_views')
 570             if max_views is not None and view_count > max_views:
 571                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 572         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 573             return 'Skipping "%s" because it is age restricted' % title
 574         if self.in_download_archive(info_dict):
 575             return '%s has already been recorded in archive' % video_title
 576         return None
 577
 578     @staticmethod
 579     def add_extra_info(info_dict, extra_info):
 580         '''Set the keys from extra_info in info dict if they are missing'''
 581         for key, value in extra_info.items():
 582             info_dict.setdefault(key, value)
 583
 584     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 585                      process=True):
 586         '''
 587         Returns a list with a dictionary for each video we find.
 588         If 'download', also downloads the videos.
 589         extra_info is a dict containing the extra values to add to each result
 590          '''
 591
 592         if ie_key:
 593             ies = [self.get_info_extractor(ie_key)]
 594         else:
 595             ies = self._ies
 596
 597         for ie in ies:
 598             if not ie.suitable(url):
 599                 continue
 600
 601             if not ie.working():
 602                 self.report_warning('The program functionality for this site has been marked as broken, '
 603                                     'and will probably not work.')
 604
 605             try:
 606                 ie_result = ie.extract(url)
 607                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 608                     break
 609                 if isinstance(ie_result, list):
 610                     # Backwards compatibility: old IE result format
 611                     ie_result = {
 612                         '_type': 'compat_list',
 613                         'entries': ie_result,
 614                     }
 615                 self.add_default_extra_info(ie_result, ie, url)
 616                 if process:
 617                     return self.process_ie_result(ie_result, download, extra_info)
 618                 else:
 619                     return ie_result
 620             except ExtractorError as de:  # An error we somewhat expected
 621                 self.report_error(compat_str(de), de.format_traceback())
 622                 break
 623             except MaxDownloadsReached:
 624                 raise
 625             except Exception as e:
 626                 if self.params.get('ignoreerrors', False):
 627                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 628                     break
 629                 else:
 630                     raise
 631         else:
 632             self.report_error('no suitable InfoExtractor for URL %s' % url)
 633
 634     def add_default_extra_info(self, ie_result, ie, url):
 635         self.add_extra_info(ie_result, {
 636             'extractor': ie.IE_NAME,
 637             'webpage_url': url,
 638             'webpage_url_basename': url_basename(url),
 639             'extractor_key': ie.ie_key(),
 640         })
 641
 642     def process_ie_result(self, ie_result, download=True, extra_info={}):
 643         """
 644         Take the result of the ie(may be modified) and resolve all unresolved
 645         references (URLs, playlist items).
 646
 647         It will also download the videos if 'download'.
 648         Returns the resolved ie_result.
 649         """
 650
 651         result_type = ie_result.get('_type', 'video')
 652
 653         if result_type in ('url', 'url_transparent'):
 654             extract_flat = self.params.get('extract_flat', False)
 655             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 656                     extract_flat is True):
 657                 if self.params.get('forcejson', False):
 658                     self.to_stdout(json.dumps(ie_result))
 659                 return ie_result
 660
 661         if result_type == 'video':
 662             self.add_extra_info(ie_result, extra_info)
 663             return self.process_video_result(ie_result, download=download)
 664         elif result_type == 'url':
 665             # We have to add extra_info to the results because it may be
 666             # contained in a playlist
 667             return self.extract_info(ie_result['url'],
 668                                      download,
 669                                      ie_key=ie_result.get('ie_key'),
 670                                      extra_info=extra_info)
 671         elif result_type == 'url_transparent':
 672             # Use the information from the embedding page
 673             info = self.extract_info(
 674                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 675                 extra_info=extra_info, download=False, process=False)
 676
 677             force_properties = dict(
 678                 (k, v) for k, v in ie_result.items() if v is not None)
 679             for f in ('_type', 'url'):
 680                 if f in force_properties:
 681                     del force_properties[f]
 682             new_result = info.copy()
 683             new_result.update(force_properties)
 684
 685             assert new_result.get('_type') != 'url_transparent'
 686
 687             return self.process_ie_result(
 688                 new_result, download=download, extra_info=extra_info)
 689         elif result_type == 'playlist' or result_type == 'multi_video':
 690             # We process each entry in the playlist
 691             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 692             self.to_screen('[download] Downloading playlist: %s' % playlist)
 693
 694             playlist_results = []
 695
 696             playliststart = self.params.get('playliststart', 1) - 1
 697             playlistend = self.params.get('playlistend', None)
 698             # For backwards compatibility, interpret -1 as whole list
 699             if playlistend == -1:
 700                 playlistend = None
 701
 702             ie_entries = ie_result['entries']
 703             if isinstance(ie_entries, list):
 704                 n_all_entries = len(ie_entries)
 705                 entries = ie_entries[playliststart:playlistend]
 706                 n_entries = len(entries)
 707                 self.to_screen(
 708                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 709                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 710             elif isinstance(ie_entries, PagedList):
 711                 entries = ie_entries.getslice(
 712                     playliststart, playlistend)
 713                 n_entries = len(entries)
 714                 self.to_screen(
 715                     "[%s] playlist %s: Downloading %d videos" %
 716                     (ie_result['extractor'], playlist, n_entries))
 717             else:  # iterable
 718                 entries = list(itertools.islice(
 719                     ie_entries, playliststart, playlistend))
 720                 n_entries = len(entries)
 721                 self.to_screen(
 722                     "[%s] playlist %s: Downloading %d videos" %
 723                     (ie_result['extractor'], playlist, n_entries))
 724
 725             if self.params.get('playlistreverse', False):
 726                 entries = entries[::-1]
 727
 728             for i, entry in enumerate(entries, 1):
 729                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 730                 extra = {
 731                     'n_entries': n_entries,
 732                     'playlist': playlist,
 733                     'playlist_id': ie_result.get('id'),
 734                     'playlist_title': ie_result.get('title'),
 735                     'playlist_index': i + playliststart,
 736                     'extractor': ie_result['extractor'],
 737                     'webpage_url': ie_result['webpage_url'],
 738                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 739                     'extractor_key': ie_result['extractor_key'],
 740                 }
 741
 742                 reason = self._match_entry(entry)
 743                 if reason is not None:
 744                     self.to_screen('[download] ' + reason)
 745                     continue
 746
 747                 entry_result = self.process_ie_result(entry,
 748                                                       download=download,
 749                                                       extra_info=extra)
 750                 playlist_results.append(entry_result)
 751             ie_result['entries'] = playlist_results
 752             return ie_result
 753         elif result_type == 'compat_list':
 754             self.report_warning(
 755                 'Extractor %s returned a compat_list result. '
 756                 'It needs to be updated.' % ie_result.get('extractor'))
 757
 758             def _fixup(r):
 759                 self.add_extra_info(
 760                     r,
 761                     {
 762                         'extractor': ie_result['extractor'],
 763                         'webpage_url': ie_result['webpage_url'],
 764                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 765                         'extractor_key': ie_result['extractor_key'],
 766                     }
 767                 )
 768                 return r
 769             ie_result['entries'] = [
 770                 self.process_ie_result(_fixup(r), download, extra_info)
 771                 for r in ie_result['entries']
 772             ]
 773             return ie_result
 774         else:
 775             raise Exception('Invalid result type: %s' % result_type)
 776
 777     def _apply_format_filter(self, format_spec, available_formats):
 778         " Returns a tuple of the remaining format_spec and filtered formats "
 779
 780         OPERATORS = {
 781             '<': operator.lt,
 782             '<=': operator.le,
 783             '>': operator.gt,
 784             '>=': operator.ge,
 785             '=': operator.eq,
 786             '!=': operator.ne,
 787         }
 788         operator_rex = re.compile(r'''(?x)\s*\[
 789             (?P<key>width|height|tbr|abr|vbr|filesize)
 790             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 791             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 792             \]$
 793             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 794         m = operator_rex.search(format_spec)
 795         if not m:
 796             raise ValueError('Invalid format specification %r' % format_spec)
 797
 798         try:
 799             comparison_value = int(m.group('value'))
 800         except ValueError:
 801             comparison_value = parse_filesize(m.group('value'))
 802             if comparison_value is None:
 803                 comparison_value = parse_filesize(m.group('value') + 'B')
 804             if comparison_value is None:
 805                 raise ValueError(
 806                     'Invalid value %r in format specification %r' % (
 807                         m.group('value'), format_spec))
 808         op = OPERATORS[m.group('op')]
 809
 810         def _filter(f):
 811             actual_value = f.get(m.group('key'))
 812             if actual_value is None:
 813                 return m.group('none_inclusive')
 814             return op(actual_value, comparison_value)
 815         new_formats = [f for f in available_formats if _filter(f)]
 816
 817         new_format_spec = format_spec[:-len(m.group(0))]
 818         if not new_format_spec:
 819             new_format_spec = 'best'
 820
 821         return (new_format_spec, new_formats)
 822
 823     def select_format(self, format_spec, available_formats):
 824         while format_spec.endswith(']'):
 825             format_spec, available_formats = self._apply_format_filter(
 826                 format_spec, available_formats)
 827         if not available_formats:
 828             return None
 829
 830         if format_spec == 'best' or format_spec is None:
 831             return available_formats[-1]
 832         elif format_spec == 'worst':
 833             return available_formats[0]
 834         elif format_spec == 'bestaudio':
 835             audio_formats = [
 836                 f for f in available_formats
 837                 if f.get('vcodec') == 'none']
 838             if audio_formats:
 839                 return audio_formats[-1]
 840         elif format_spec == 'worstaudio':
 841             audio_formats = [
 842                 f for f in available_formats
 843                 if f.get('vcodec') == 'none']
 844             if audio_formats:
 845                 return audio_formats[0]
 846         elif format_spec == 'bestvideo':
 847             video_formats = [
 848                 f for f in available_formats
 849                 if f.get('acodec') == 'none']
 850             if video_formats:
 851                 return video_formats[-1]
 852         elif format_spec == 'worstvideo':
 853             video_formats = [
 854                 f for f in available_formats
 855                 if f.get('acodec') == 'none']
 856             if video_formats:
 857                 return video_formats[0]
 858         else:
 859             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 860             if format_spec in extensions:
 861                 filter_f = lambda f: f['ext'] == format_spec
 862             else:
 863                 filter_f = lambda f: f['format_id'] == format_spec
 864             matches = list(filter(filter_f, available_formats))
 865             if matches:
 866                 return matches[-1]
 867         return None
 868
 869     def _calc_headers(self, info_dict):
 870         res = std_headers.copy()
 871
 872         add_headers = info_dict.get('http_headers')
 873         if add_headers:
 874             res.update(add_headers)
 875
 876         cookies = self._calc_cookies(info_dict)
 877         if cookies:
 878             res['Cookie'] = cookies
 879
 880         return res
 881
 882     def _calc_cookies(self, info_dict):
 883         class _PseudoRequest(object):
 884             def __init__(self, url):
 885                 self.url = url
 886                 self.headers = {}
 887                 self.unverifiable = False
 888
 889             def add_unredirected_header(self, k, v):
 890                 self.headers[k] = v
 891
 892             def get_full_url(self):
 893                 return self.url
 894
 895             def has_header(self, h):
 896                 return h in self.headers
 897
 898         pr = _PseudoRequest(info_dict['url'])
 899         self.cookiejar.add_cookie_header(pr)
 900         return pr.headers.get('Cookie')
 901
 902     def process_video_result(self, info_dict, download=True):
 903         assert info_dict.get('_type', 'video') == 'video'
 904
 905         if 'id' not in info_dict:
 906             raise ExtractorError('Missing "id" field in extractor result')
 907         if 'title' not in info_dict:
 908             raise ExtractorError('Missing "title" field in extractor result')
 909
 910         if 'playlist' not in info_dict:
 911             # It isn't part of a playlist
 912             info_dict['playlist'] = None
 913             info_dict['playlist_index'] = None
 914
 915         thumbnails = info_dict.get('thumbnails')
 916         if thumbnails:
 917             thumbnails.sort(key=lambda t: (
 918                 t.get('width'), t.get('height'), t.get('url')))
 919             for t in thumbnails:
 920                 if 'width' in t and 'height' in t:
 921                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 922
 923         if thumbnails and 'thumbnail' not in info_dict:
 924             info_dict['thumbnail'] = thumbnails[-1]['url']
 925
 926         if 'display_id' not in info_dict and 'id' in info_dict:
 927             info_dict['display_id'] = info_dict['id']
 928
 929         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 930             # Working around negative timestamps in Windows
 931             # (see http://bugs.python.org/issue1646728)
 932             if info_dict['timestamp'] < 0 and os.name == 'nt':
 933                 info_dict['timestamp'] = 0
 934             upload_date = datetime.datetime.utcfromtimestamp(
 935                 info_dict['timestamp'])
 936             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 937
 938         # This extractors handle format selection themselves
 939         if info_dict['extractor'] in ['Youku']:
 940             if download:
 941                 self.process_info(info_dict)
 942             return info_dict
 943
 944         # We now pick which formats have to be downloaded
 945         if info_dict.get('formats') is None:
 946             # There's only one format available
 947             formats = [info_dict]
 948         else:
 949             formats = info_dict['formats']
 950
 951         if not formats:
 952             raise ExtractorError('No video formats found!')
 953
 954         # We check that all the formats have the format and format_id fields
 955         for i, format in enumerate(formats):
 956             if 'url' not in format:
 957                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 958
 959             if format.get('format_id') is None:
 960                 format['format_id'] = compat_str(i)
 961             if format.get('format') is None:
 962                 format['format'] = '{id} - {res}{note}'.format(
 963                     id=format['format_id'],
 964                     res=self.format_resolution(format),
 965                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 966                 )
 967             # Automatically determine file extension if missing
 968             if 'ext' not in format:
 969                 format['ext'] = determine_ext(format['url']).lower()
 970             # Add HTTP headers, so that external programs can use them from the
 971             # json output
 972             full_format_info = info_dict.copy()
 973             full_format_info.update(format)
 974             format['http_headers'] = self._calc_headers(full_format_info)
 975
 976         format_limit = self.params.get('format_limit', None)
 977         if format_limit:
 978             formats = list(takewhile_inclusive(
 979                 lambda f: f['format_id'] != format_limit, formats
 980             ))
 981
 982         # TODO Central sorting goes here
 983
 984         if formats[0] is not info_dict:
 985             # only set the 'formats' fields if the original info_dict list them
 986             # otherwise we end up with a circular reference, the first (and unique)
 987             # element in the 'formats' field in info_dict is info_dict itself,
 988             # wich can't be exported to json
 989             info_dict['formats'] = formats
 990         if self.params.get('listformats', None):
 991             self.list_formats(info_dict)
 992             return
 993
 994         req_format = self.params.get('format')
 995         if req_format is None:
 996             req_format = 'best'
 997         formats_to_download = []
 998         # The -1 is for supporting YoutubeIE
 999         if req_format in ('-1', 'all'):
1000             formats_to_download = formats
1001         else:
1002             for rfstr in req_format.split(','):
1003                 # We can accept formats requested in the format: 34/5/best, we pick
1004                 # the first that is available, starting from left
1005                 req_formats = rfstr.split('/')
1006                 for rf in req_formats:
1007                     if re.match(r'.+?\+.+?', rf) is not None:
1008                         # Two formats have been requested like '137+139'
1009                         format_1, format_2 = rf.split('+')
1010                         formats_info = (self.select_format(format_1, formats),
1011                                         self.select_format(format_2, formats))
1012                         if all(formats_info):
1013                             # The first format must contain the video and the
1014                             # second the audio
1015                             if formats_info[0].get('vcodec') == 'none':
1016                                 self.report_error('The first format must '
1017                                                   'contain the video, try using '
1018                                                   '"-f %s+%s"' % (format_2, format_1))
1019                                 return
1020                             output_ext = (
1021                                 formats_info[0]['ext']
1022                                 if self.params.get('merge_output_format') is None
1023                                 else self.params['merge_output_format'])
1024                             selected_format = {
1025                                 'requested_formats': formats_info,
1026                                 'format': rf,
1027                                 'ext': formats_info[0]['ext'],
1028                                 'width': formats_info[0].get('width'),
1029                                 'height': formats_info[0].get('height'),
1030                                 'resolution': formats_info[0].get('resolution'),
1031                                 'fps': formats_info[0].get('fps'),
1032                                 'vcodec': formats_info[0].get('vcodec'),
1033                                 'vbr': formats_info[0].get('vbr'),
1034                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1035                                 'acodec': formats_info[1].get('acodec'),
1036                                 'abr': formats_info[1].get('abr'),
1037                                 'ext': output_ext,
1038                             }
1039                         else:
1040                             selected_format = None
1041                     else:
1042                         selected_format = self.select_format(rf, formats)
1043                     if selected_format is not None:
1044                         formats_to_download.append(selected_format)
1045                         break
1046         if not formats_to_download:
1047             raise ExtractorError('requested format not available',
1048                                  expected=True)
1049
1050         if download:
1051             if len(formats_to_download) > 1:
1052                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1053             for format in formats_to_download:
1054                 new_info = dict(info_dict)
1055                 new_info.update(format)
1056                 self.process_info(new_info)
1057         # We update the info dict with the best quality format (backwards compatibility)
1058         info_dict.update(formats_to_download[-1])
1059         return info_dict
1060
1061     def process_info(self, info_dict):
1062         """Process a single resolved IE result."""
1063
1064         assert info_dict.get('_type', 'video') == 'video'
1065
1066         max_downloads = self.params.get('max_downloads')
1067         if max_downloads is not None:
1068             if self._num_downloads >= int(max_downloads):
1069                 raise MaxDownloadsReached()
1070
1071         info_dict['fulltitle'] = info_dict['title']
1072         if len(info_dict['title']) > 200:
1073             info_dict['title'] = info_dict['title'][:197] + '...'
1074
1075         # Keep for backwards compatibility
1076         info_dict['stitle'] = info_dict['title']
1077
1078         if 'format' not in info_dict:
1079             info_dict['format'] = info_dict['ext']
1080
1081         reason = self._match_entry(info_dict)
1082         if reason is not None:
1083             self.to_screen('[download] ' + reason)
1084             return
1085
1086         self._num_downloads += 1
1087
1088         filename = self.prepare_filename(info_dict)
1089
1090         # Forced printings
1091         if self.params.get('forcetitle', False):
1092             self.to_stdout(info_dict['fulltitle'])
1093         if self.params.get('forceid', False):
1094             self.to_stdout(info_dict['id'])
1095         if self.params.get('forceurl', False):
1096             if info_dict.get('requested_formats') is not None:
1097                 for f in info_dict['requested_formats']:
1098                     self.to_stdout(f['url'] + f.get('play_path', ''))
1099             else:
1100                 # For RTMP URLs, also include the playpath
1101                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1102         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1103             self.to_stdout(info_dict['thumbnail'])
1104         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1105             self.to_stdout(info_dict['description'])
1106         if self.params.get('forcefilename', False) and filename is not None:
1107             self.to_stdout(filename)
1108         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1109             self.to_stdout(formatSeconds(info_dict['duration']))
1110         if self.params.get('forceformat', False):
1111             self.to_stdout(info_dict['format'])
1112         if self.params.get('forcejson', False):
1113             info_dict['_filename'] = filename
1114             self.to_stdout(json.dumps(info_dict))
1115         if self.params.get('dump_single_json', False):
1116             info_dict['_filename'] = filename
1117
1118         # Do nothing else if in simulate mode
1119         if self.params.get('simulate', False):
1120             return
1121
1122         if filename is None:
1123             return
1124
1125         try:
1126             dn = os.path.dirname(encodeFilename(filename))
1127             if dn and not os.path.exists(dn):
1128                 os.makedirs(dn)
1129         except (OSError, IOError) as err:
1130             self.report_error('unable to create directory ' + compat_str(err))
1131             return
1132
1133         if self.params.get('writedescription', False):
1134             descfn = filename + '.description'
1135             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1136                 self.to_screen('[info] Video description is already present')
1137             elif info_dict.get('description') is None:
1138                 self.report_warning('There\'s no description to write.')
1139             else:
1140                 try:
1141                     self.to_screen('[info] Writing video description to: ' + descfn)
1142                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1143                         descfile.write(info_dict['description'])
1144                 except (OSError, IOError):
1145                     self.report_error('Cannot write description file ' + descfn)
1146                     return
1147
1148         if self.params.get('writeannotations', False):
1149             annofn = filename + '.annotations.xml'
1150             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1151                 self.to_screen('[info] Video annotations are already present')
1152             else:
1153                 try:
1154                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1155                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1156                         annofile.write(info_dict['annotations'])
1157                 except (KeyError, TypeError):
1158                     self.report_warning('There are no annotations to write.')
1159                 except (OSError, IOError):
1160                     self.report_error('Cannot write annotations file: ' + annofn)
1161                     return
1162
1163         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1164                                        self.params.get('writeautomaticsub')])
1165
1166         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1167             # subtitles download errors are already managed as troubles in relevant IE
1168             # that way it will silently go on when used with unsupporting IE
1169             subtitles = info_dict['subtitles']
1170             sub_format = self.params.get('subtitlesformat', 'srt')
1171             for sub_lang in subtitles.keys():
1172                 sub = subtitles[sub_lang]
1173                 if sub is None:
1174                     continue
1175                 try:
1176                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1177                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1178                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1179                     else:
1180                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1181                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1182                             subfile.write(sub)
1183                 except (OSError, IOError):
1184                     self.report_error('Cannot write subtitles file ' + sub_filename)
1185                     return
1186
1187         if self.params.get('writeinfojson', False):
1188             infofn = os.path.splitext(filename)[0] + '.info.json'
1189             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1190                 self.to_screen('[info] Video description metadata is already present')
1191             else:
1192                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1193                 try:
1194                     write_json_file(info_dict, infofn)
1195                 except (OSError, IOError):
1196                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1197                     return
1198
1199         if self.params.get('writethumbnail', False):
1200             if info_dict.get('thumbnail') is not None:
1201                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1202                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1203                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1204                     self.to_screen('[%s] %s: Thumbnail is already present' %
1205                                    (info_dict['extractor'], info_dict['id']))
1206                 else:
1207                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1208                                    (info_dict['extractor'], info_dict['id']))
1209                     try:
1210                         uf = self.urlopen(info_dict['thumbnail'])
1211                         with open(thumb_filename, 'wb') as thumbf:
1212                             shutil.copyfileobj(uf, thumbf)
1213                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1214                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1215                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1216                         self.report_warning('Unable to download thumbnail "%s": %s' %
1217                                             (info_dict['thumbnail'], compat_str(err)))
1218
1219         if not self.params.get('skip_download', False):
1220             try:
1221                 def dl(name, info):
1222                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1223                     for ph in self._progress_hooks:
1224                         fd.add_progress_hook(ph)
1225                     if self.params.get('verbose'):
1226                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1227                     return fd.download(name, info)
1228                 if info_dict.get('requested_formats') is not None:
1229                     downloaded = []
1230                     success = True
1231                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1232                     if not merger._executable:
1233                         postprocessors = []
1234                         self.report_warning('You have requested multiple '
1235                                             'formats but ffmpeg or avconv are not installed.'
1236                                             ' The formats won\'t be merged')
1237                     else:
1238                         postprocessors = [merger]
1239                     for f in info_dict['requested_formats']:
1240                         new_info = dict(info_dict)
1241                         new_info.update(f)
1242                         fname = self.prepare_filename(new_info)
1243                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1244                         downloaded.append(fname)
1245                         partial_success = dl(fname, new_info)
1246                         success = success and partial_success
1247                     info_dict['__postprocessors'] = postprocessors
1248                     info_dict['__files_to_merge'] = downloaded
1249                 else:
1250                     # Just a single file
1251                     success = dl(filename, info_dict)
1252             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1253                 self.report_error('unable to download video data: %s' % str(err))
1254                 return
1255             except (OSError, IOError) as err:
1256                 raise UnavailableVideoError(err)
1257             except (ContentTooShortError, ) as err:
1258                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1259                 return
1260
1261             if success:
1262                 # Fixup content
1263                 fixup_policy = self.params.get('fixup')
1264                 if fixup_policy is None:
1265                     fixup_policy = 'detect_or_warn'
1266
1267                 stretched_ratio = info_dict.get('stretched_ratio')
1268                 if stretched_ratio is not None and stretched_ratio != 1:
1269                     if fixup_policy == 'warn':
1270                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1271                             info_dict['id'], stretched_ratio))
1272                     elif fixup_policy == 'detect_or_warn':
1273                         stretched_pp = FFmpegFixupStretchedPP(self)
1274                         if stretched_pp.available:
1275                             info_dict.setdefault('__postprocessors', [])
1276                             info_dict['__postprocessors'].append(stretched_pp)
1277                         else:
1278                             self.report_warning(
1279                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1280                                     info_dict['id'], stretched_ratio))
1281                     else:
1282                         assert fixup_policy in ('ignore', 'never')
1283
1284                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1285                     if fixup_policy == 'warn':
1286                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1287                             info_dict['id']))
1288                     elif fixup_policy == 'detect_or_warn':
1289                         fixup_pp = FFmpegFixupM4aPP(self)
1290                         if fixup_pp.available:
1291                             info_dict.setdefault('__postprocessors', [])
1292                             info_dict['__postprocessors'].append(fixup_pp)
1293                         else:
1294                             self.report_warning(
1295                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1296                                     info_dict['id']))
1297                     else:
1298                         assert fixup_policy in ('ignore', 'never')
1299
1300                 try:
1301                     self.post_process(filename, info_dict)
1302                 except (PostProcessingError) as err:
1303                     self.report_error('postprocessing: %s' % str(err))
1304                     return
1305                 self.record_download_archive(info_dict)
1306
1307     def download(self, url_list):
1308         """Download a given list of URLs."""
1309         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1310         if (len(url_list) > 1 and
1311                 '%' not in outtmpl
1312                 and self.params.get('max_downloads') != 1):
1313             raise SameFileError(outtmpl)
1314
1315         for url in url_list:
1316             try:
1317                 # It also downloads the videos
1318                 res = self.extract_info(url)
1319             except UnavailableVideoError:
1320                 self.report_error('unable to download video')
1321             except MaxDownloadsReached:
1322                 self.to_screen('[info] Maximum number of downloaded files reached.')
1323                 raise
1324             else:
1325                 if self.params.get('dump_single_json', False):
1326                     self.to_stdout(json.dumps(res))
1327
1328         return self._download_retcode
1329
1330     def download_with_info_file(self, info_filename):
1331         with io.open(info_filename, 'r', encoding='utf-8') as f:
1332             info = json.load(f)
1333         try:
1334             self.process_ie_result(info, download=True)
1335         except DownloadError:
1336             webpage_url = info.get('webpage_url')
1337             if webpage_url is not None:
1338                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1339                 return self.download([webpage_url])
1340             else:
1341                 raise
1342         return self._download_retcode
1343
1344     def post_process(self, filename, ie_info):
1345         """Run all the postprocessors on the given file."""
1346         info = dict(ie_info)
1347         info['filepath'] = filename
1348         pps_chain = []
1349         if ie_info.get('__postprocessors') is not None:
1350             pps_chain.extend(ie_info['__postprocessors'])
1351         pps_chain.extend(self._pps)
1352         for pp in pps_chain:
1353             keep_video = None
1354             old_filename = info['filepath']
1355             try:
1356                 keep_video_wish, info = pp.run(info)
1357                 if keep_video_wish is not None:
1358                     if keep_video_wish:
1359                         keep_video = keep_video_wish
1360                     elif keep_video is None:
1361                         # No clear decision yet, let IE decide
1362                         keep_video = keep_video_wish
1363             except PostProcessingError as e:
1364                 self.report_error(e.msg)
1365             if keep_video is False and not self.params.get('keepvideo', False):
1366                 try:
1367                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1368                     os.remove(encodeFilename(old_filename))
1369                 except (IOError, OSError):
1370                     self.report_warning('Unable to remove downloaded video file')
1371
1372     def _make_archive_id(self, info_dict):
1373         # Future-proof against any change in case
1374         # and backwards compatibility with prior versions
1375         extractor = info_dict.get('extractor_key')
1376         if extractor is None:
1377             if 'id' in info_dict:
1378                 extractor = info_dict.get('ie_key')  # key in a playlist
1379         if extractor is None:
1380             return None  # Incomplete video information
1381         return extractor.lower() + ' ' + info_dict['id']
1382
1383     def in_download_archive(self, info_dict):
1384         fn = self.params.get('download_archive')
1385         if fn is None:
1386             return False
1387
1388         vid_id = self._make_archive_id(info_dict)
1389         if vid_id is None:
1390             return False  # Incomplete video information
1391
1392         try:
1393             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1394                 for line in archive_file:
1395                     if line.strip() == vid_id:
1396                         return True
1397         except IOError as ioe:
1398             if ioe.errno != errno.ENOENT:
1399                 raise
1400         return False
1401
1402     def record_download_archive(self, info_dict):
1403         fn = self.params.get('download_archive')
1404         if fn is None:
1405             return
1406         vid_id = self._make_archive_id(info_dict)
1407         assert vid_id
1408         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1409             archive_file.write(vid_id + '\n')
1410
1411     @staticmethod
1412     def format_resolution(format, default='unknown'):
1413         if format.get('vcodec') == 'none':
1414             return 'audio only'
1415         if format.get('resolution') is not None:
1416             return format['resolution']
1417         if format.get('height') is not None:
1418             if format.get('width') is not None:
1419                 res = '%sx%s' % (format['width'], format['height'])
1420             else:
1421                 res = '%sp' % format['height']
1422         elif format.get('width') is not None:
1423             res = '?x%d' % format['width']
1424         else:
1425             res = default
1426         return res
1427
1428     def _format_note(self, fdict):
1429         res = ''
1430         if fdict.get('ext') in ['f4f', 'f4m']:
1431             res += '(unsupported) '
1432         if fdict.get('format_note') is not None:
1433             res += fdict['format_note'] + ' '
1434         if fdict.get('tbr') is not None:
1435             res += '%4dk ' % fdict['tbr']
1436         if fdict.get('container') is not None:
1437             if res:
1438                 res += ', '
1439             res += '%s container' % fdict['container']
1440         if (fdict.get('vcodec') is not None and
1441                 fdict.get('vcodec') != 'none'):
1442             if res:
1443                 res += ', '
1444             res += fdict['vcodec']
1445             if fdict.get('vbr') is not None:
1446                 res += '@'
1447         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1448             res += 'video@'
1449         if fdict.get('vbr') is not None:
1450             res += '%4dk' % fdict['vbr']
1451         if fdict.get('fps') is not None:
1452             res += ', %sfps' % fdict['fps']
1453         if fdict.get('acodec') is not None:
1454             if res:
1455                 res += ', '
1456             if fdict['acodec'] == 'none':
1457                 res += 'video only'
1458             else:
1459                 res += '%-5s' % fdict['acodec']
1460         elif fdict.get('abr') is not None:
1461             if res:
1462                 res += ', '
1463             res += 'audio'
1464         if fdict.get('abr') is not None:
1465             res += '@%3dk' % fdict['abr']
1466         if fdict.get('asr') is not None:
1467             res += ' (%5dHz)' % fdict['asr']
1468         if fdict.get('filesize') is not None:
1469             if res:
1470                 res += ', '
1471             res += format_bytes(fdict['filesize'])
1472         elif fdict.get('filesize_approx') is not None:
1473             if res:
1474                 res += ', '
1475             res += '~' + format_bytes(fdict['filesize_approx'])
1476         return res
1477
1478     def list_formats(self, info_dict):
1479         def line(format, idlen=20):
1480             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1481                 format['format_id'],
1482                 format['ext'],
1483                 self.format_resolution(format),
1484                 self._format_note(format),
1485             ))
1486
1487         formats = info_dict.get('formats', [info_dict])
1488         idlen = max(len('format code'),
1489                     max(len(f['format_id']) for f in formats))
1490         formats_s = [
1491             line(f, idlen) for f in formats
1492             if f.get('preference') is None or f['preference'] >= -1000]
1493         if len(formats) > 1:
1494             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1495             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1496
1497         header_line = line({
1498             'format_id': 'format code', 'ext': 'extension',
1499             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1500         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1501                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1502
1503     def urlopen(self, req):
1504         """ Start an HTTP download """
1505
1506         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1507         # always respected by websites, some tend to give out URLs with non percent-encoded
1508         # non-ASCII characters (see telemb.py, ard.py [#3412])
1509         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1510         # To work around aforementioned issue we will replace request's original URL with
1511         # percent-encoded one
1512         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1513         url = req if req_is_string else req.get_full_url()
1514         url_escaped = escape_url(url)
1515
1516         # Substitute URL if any change after escaping
1517         if url != url_escaped:
1518             if req_is_string:
1519                 req = url_escaped
1520             else:
1521                 req = compat_urllib_request.Request(
1522                     url_escaped, data=req.data, headers=req.headers,
1523                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1524
1525         return self._opener.open(req, timeout=self._socket_timeout)
1526
1527     def print_debug_header(self):
1528         if not self.params.get('verbose'):
1529             return
1530
1531         if type('') is not compat_str:
1532             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1533             self.report_warning(
1534                 'Your Python is broken! Update to a newer and supported version')
1535
1536         stdout_encoding = getattr(
1537             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1538         encoding_str = (
1539             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1540                 locale.getpreferredencoding(),
1541                 sys.getfilesystemencoding(),
1542                 stdout_encoding,
1543                 self.get_encoding()))
1544         write_string(encoding_str, encoding=None)
1545
1546         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1547         try:
1548             sp = subprocess.Popen(
1549                 ['git', 'rev-parse', '--short', 'HEAD'],
1550                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1551                 cwd=os.path.dirname(os.path.abspath(__file__)))
1552             out, err = sp.communicate()
1553             out = out.decode().strip()
1554             if re.match('[0-9a-f]+', out):
1555                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1556         except:
1557             try:
1558                 sys.exc_clear()
1559             except:
1560                 pass
1561         self._write_string('[debug] Python version %s - %s\n' % (
1562             platform.python_version(), platform_name()))
1563
1564         exe_versions = FFmpegPostProcessor.get_versions()
1565         exe_versions['rtmpdump'] = rtmpdump_version()
1566         exe_str = ', '.join(
1567             '%s %s' % (exe, v)
1568             for exe, v in sorted(exe_versions.items())
1569             if v
1570         )
1571         if not exe_str:
1572             exe_str = 'none'
1573         self._write_string('[debug] exe versions: %s\n' % exe_str)
1574
1575         proxy_map = {}
1576         for handler in self._opener.handlers:
1577             if hasattr(handler, 'proxies'):
1578                 proxy_map.update(handler.proxies)
1579         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1580
1581         if self.params.get('call_home', False):
1582             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1583             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1584             latest_version = self.urlopen(
1585                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1586             if version_tuple(latest_version) > version_tuple(__version__):
1587                 self.report_warning(
1588                     'You are using an outdated version (newest version: %s)! '
1589                     'See https://yt-dl.org/update if you need help updating.' %
1590                     latest_version)
1591
1592     def _setup_opener(self):
1593         timeout_val = self.params.get('socket_timeout')
1594         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1595
1596         opts_cookiefile = self.params.get('cookiefile')
1597         opts_proxy = self.params.get('proxy')
1598
1599         if opts_cookiefile is None:
1600             self.cookiejar = compat_cookiejar.CookieJar()
1601         else:
1602             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1603                 opts_cookiefile)
1604             if os.access(opts_cookiefile, os.R_OK):
1605                 self.cookiejar.load()
1606
1607         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1608             self.cookiejar)
1609         if opts_proxy is not None:
1610             if opts_proxy == '':
1611                 proxies = {}
1612             else:
1613                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1614         else:
1615             proxies = compat_urllib_request.getproxies()
1616             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1617             if 'http' in proxies and 'https' not in proxies:
1618                 proxies['https'] = proxies['http']
1619         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1620
1621         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1622         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1623         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1624         opener = compat_urllib_request.build_opener(
1625             https_handler, proxy_handler, cookie_processor, ydlh)
1626         # Delete the default user-agent header, which would otherwise apply in
1627         # cases where our custom HTTP handler doesn't come into play
1628         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1629         opener.addheaders = []
1630         self._opener = opener
1631
1632     def encode(self, s):
1633         if isinstance(s, bytes):
1634             return s  # Already encoded
1635
1636         try:
1637             return s.encode(self.get_encoding())
1638         except UnicodeEncodeError as err:
1639             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1640             raise
1641
1642     def get_encoding(self):
1643         encoding = self.params.get('encoding')
1644         if encoding is None:
1645             encoding = preferredencoding()
1646         return encoding