YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading" and "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading" or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * downloaded_bytes: Bytes on disk
 209                        * total_bytes: Size of the whole file, None if unknown
 210                        * tmpfilename: The filename we're currently writing to
 211                        * eta: The estimated time in seconds, None if unknown
 212                        * speed: The download speed in bytes/second, None if
 213                                 unknown
 214
 215                        Progress hooks are guaranteed to be called at least once
 216                        (with status "finished") if the download is successful.
 217     merge_output_format: Extension to use when merging formats.
 218     fixup:             Automatically correct known faults of the file.
 219                        One of:
 220                        - "never": do nothing
 221                        - "warn": only emit a warning
 222                        - "detect_or_warn": check whether we can do anything
 223                                            about it, warn otherwise (default)
 224     source_address:    (Experimental) Client-side IP address to bind to.
 225     call_home:         Boolean, true iff we are allowed to contact the
 226                        youtube-dl servers for debugging.
 227     sleep_interval:    Number of seconds to sleep before each download.
 228     external_downloader:  Executable of the external downloader to call.
 229     listformats:       Print an overview of available video formats and exit.
 230     list_thumbnails:   Print a table of all thumbnails and exit.
 231     match_filter:      A function that gets called with the info_dict of
 232                        every video.
 233                        If it returns a message, the video is ignored.
 234                        If it returns None, the video is downloaded.
 235                        match_filter_func in utils.py is one example for this.
 236     no_color:          Do not emit color codes in output.
 237
 238
 239     The following parameters are not used by YoutubeDL itself, they are used by
 240     the FileDownloader:
 241     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 242     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 243     xattr_set_filesize.
 244
 245     The following options are used by the post processors:
 246     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 247                        otherwise prefer avconv.
 248     exec_cmd:          Arbitrary command to run after downloading
 249     """
 250
 251     params = None
 252     _ies = []
 253     _pps = []
 254     _download_retcode = None
 255     _num_downloads = None
 256     _screen_file = None
 257
 258     def __init__(self, params=None, auto_init=True):
 259         """Create a FileDownloader object with the given options."""
 260         if params is None:
 261             params = {}
 262         self._ies = []
 263         self._ies_instances = {}
 264         self._pps = []
 265         self._progress_hooks = []
 266         self._download_retcode = 0
 267         self._num_downloads = 0
 268         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 269         self._err_file = sys.stderr
 270         self.params = params
 271         self.cache = Cache(self)
 272
 273         if params.get('bidi_workaround', False):
 274             try:
 275                 import pty
 276                 master, slave = pty.openpty()
 277                 width = get_term_width()
 278                 if width is None:
 279                     width_args = []
 280                 else:
 281                     width_args = ['-w', str(width)]
 282                 sp_kwargs = dict(
 283                     stdin=subprocess.PIPE,
 284                     stdout=slave,
 285                     stderr=self._err_file)
 286                 try:
 287                     self._output_process = subprocess.Popen(
 288                         ['bidiv'] + width_args, **sp_kwargs
 289                     )
 290                 except OSError:
 291                     self._output_process = subprocess.Popen(
 292                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 293                 self._output_channel = os.fdopen(master, 'rb')
 294             except OSError as ose:
 295                 if ose.errno == 2:
 296                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 297                 else:
 298                     raise
 299
 300         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 301                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 302                 and not params.get('restrictfilenames', False)):
 303             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 304             self.report_warning(
 305                 'Assuming --restrict-filenames since file system encoding '
 306                 'cannot encode all characters. '
 307                 'Set the LC_ALL environment variable to fix this.')
 308             self.params['restrictfilenames'] = True
 309
 310         if '%(stitle)s' in self.params.get('outtmpl', ''):
 311             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 312
 313         self._setup_opener()
 314
 315         if auto_init:
 316             self.print_debug_header()
 317             self.add_default_info_extractors()
 318
 319         for pp_def_raw in self.params.get('postprocessors', []):
 320             pp_class = get_postprocessor(pp_def_raw['key'])
 321             pp_def = dict(pp_def_raw)
 322             del pp_def['key']
 323             pp = pp_class(self, **compat_kwargs(pp_def))
 324             self.add_post_processor(pp)
 325
 326         for ph in self.params.get('progress_hooks', []):
 327             self.add_progress_hook(ph)
 328
 329     def warn_if_short_id(self, argv):
 330         # short YouTube ID starting with dash?
 331         idxs = [
 332             i for i, a in enumerate(argv)
 333             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 334         if idxs:
 335             correct_argv = (
 336                 ['youtube-dl'] +
 337                 [a for i, a in enumerate(argv) if i not in idxs] +
 338                 ['--'] + [argv[i] for i in idxs]
 339             )
 340             self.report_warning(
 341                 'Long argument string detected. '
 342                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 343                 args_to_str(correct_argv))
 344
 345     def add_info_extractor(self, ie):
 346         """Add an InfoExtractor object to the end of the list."""
 347         self._ies.append(ie)
 348         self._ies_instances[ie.ie_key()] = ie
 349         ie.set_downloader(self)
 350
 351     def get_info_extractor(self, ie_key):
 352         """
 353         Get an instance of an IE with name ie_key, it will try to get one from
 354         the _ies list, if there's no instance it will create a new one and add
 355         it to the extractor list.
 356         """
 357         ie = self._ies_instances.get(ie_key)
 358         if ie is None:
 359             ie = get_info_extractor(ie_key)()
 360             self.add_info_extractor(ie)
 361         return ie
 362
 363     def add_default_info_extractors(self):
 364         """
 365         Add the InfoExtractors returned by gen_extractors to the end of the list
 366         """
 367         for ie in gen_extractors():
 368             self.add_info_extractor(ie)
 369
 370     def add_post_processor(self, pp):
 371         """Add a PostProcessor object to the end of the chain."""
 372         self._pps.append(pp)
 373         pp.set_downloader(self)
 374
 375     def add_progress_hook(self, ph):
 376         """Add the progress hook (currently only for the file downloader)"""
 377         self._progress_hooks.append(ph)
 378
 379     def _bidi_workaround(self, message):
 380         if not hasattr(self, '_output_channel'):
 381             return message
 382
 383         assert hasattr(self, '_output_process')
 384         assert isinstance(message, compat_str)
 385         line_count = message.count('\n') + 1
 386         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 387         self._output_process.stdin.flush()
 388         res = ''.join(self._output_channel.readline().decode('utf-8')
 389                       for _ in range(line_count))
 390         return res[:-len('\n')]
 391
 392     def to_screen(self, message, skip_eol=False):
 393         """Print message to stdout if not in quiet mode."""
 394         return self.to_stdout(message, skip_eol, check_quiet=True)
 395
 396     def _write_string(self, s, out=None):
 397         write_string(s, out=out, encoding=self.params.get('encoding'))
 398
 399     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 400         """Print message to stdout if not in quiet mode."""
 401         if self.params.get('logger'):
 402             self.params['logger'].debug(message)
 403         elif not check_quiet or not self.params.get('quiet', False):
 404             message = self._bidi_workaround(message)
 405             terminator = ['\n', ''][skip_eol]
 406             output = message + terminator
 407
 408             self._write_string(output, self._screen_file)
 409
 410     def to_stderr(self, message):
 411         """Print message to stderr."""
 412         assert isinstance(message, compat_str)
 413         if self.params.get('logger'):
 414             self.params['logger'].error(message)
 415         else:
 416             message = self._bidi_workaround(message)
 417             output = message + '\n'
 418             self._write_string(output, self._err_file)
 419
 420     def to_console_title(self, message):
 421         if not self.params.get('consoletitle', False):
 422             return
 423         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 424             # c_wchar_p() might not be necessary if `message` is
 425             # already of type unicode()
 426             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 427         elif 'TERM' in os.environ:
 428             self._write_string('\033]0;%s\007' % message, self._screen_file)
 429
 430     def save_console_title(self):
 431         if not self.params.get('consoletitle', False):
 432             return
 433         if 'TERM' in os.environ:
 434             # Save the title on stack
 435             self._write_string('\033[22;0t', self._screen_file)
 436
 437     def restore_console_title(self):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if 'TERM' in os.environ:
 441             # Restore the title from stack
 442             self._write_string('\033[23;0t', self._screen_file)
 443
 444     def __enter__(self):
 445         self.save_console_title()
 446         return self
 447
 448     def __exit__(self, *args):
 449         self.restore_console_title()
 450
 451         if self.params.get('cookiefile') is not None:
 452             self.cookiejar.save()
 453
 454     def trouble(self, message=None, tb=None):
 455         """Determine action to take when a download problem appears.
 456
 457         Depending on if the downloader has been configured to ignore
 458         download errors or not, this method may throw an exception or
 459         not when errors are found, after printing the message.
 460
 461         tb, if given, is additional traceback information.
 462         """
 463         if message is not None:
 464             self.to_stderr(message)
 465         if self.params.get('verbose'):
 466             if tb is None:
 467                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 468                     tb = ''
 469                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 470                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 471                     tb += compat_str(traceback.format_exc())
 472                 else:
 473                     tb_data = traceback.format_list(traceback.extract_stack())
 474                     tb = ''.join(tb_data)
 475             self.to_stderr(tb)
 476         if not self.params.get('ignoreerrors', False):
 477             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 478                 exc_info = sys.exc_info()[1].exc_info
 479             else:
 480                 exc_info = sys.exc_info()
 481             raise DownloadError(message, exc_info)
 482         self._download_retcode = 1
 483
 484     def report_warning(self, message):
 485         '''
 486         Print the message to stderr, it will be prefixed with 'WARNING:'
 487         If stderr is a tty file the 'WARNING:' will be colored
 488         '''
 489         if self.params.get('logger') is not None:
 490             self.params['logger'].warning(message)
 491         else:
 492             if self.params.get('no_warnings'):
 493                 return
 494             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 495                 _msg_header = '\033[0;33mWARNING:\033[0m'
 496             else:
 497                 _msg_header = 'WARNING:'
 498             warning_message = '%s %s' % (_msg_header, message)
 499             self.to_stderr(warning_message)
 500
 501     def report_error(self, message, tb=None):
 502         '''
 503         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 504         in red if stderr is a tty file.
 505         '''
 506         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 507             _msg_header = '\033[0;31mERROR:\033[0m'
 508         else:
 509             _msg_header = 'ERROR:'
 510         error_message = '%s %s' % (_msg_header, message)
 511         self.trouble(error_message, tb)
 512
 513     def report_file_already_downloaded(self, file_name):
 514         """Report file has already been fully downloaded."""
 515         try:
 516             self.to_screen('[download] %s has already been downloaded' % file_name)
 517         except UnicodeEncodeError:
 518             self.to_screen('[download] The file has already been downloaded')
 519
 520     def prepare_filename(self, info_dict):
 521         """Generate the output filename."""
 522         try:
 523             template_dict = dict(info_dict)
 524
 525             template_dict['epoch'] = int(time.time())
 526             autonumber_size = self.params.get('autonumber_size')
 527             if autonumber_size is None:
 528                 autonumber_size = 5
 529             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 530             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 531             if template_dict.get('playlist_index') is not None:
 532                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 533             if template_dict.get('resolution') is None:
 534                 if template_dict.get('width') and template_dict.get('height'):
 535                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 536                 elif template_dict.get('height'):
 537                     template_dict['resolution'] = '%sp' % template_dict['height']
 538                 elif template_dict.get('width'):
 539                     template_dict['resolution'] = '?x%d' % template_dict['width']
 540
 541             sanitize = lambda k, v: sanitize_filename(
 542                 compat_str(v),
 543                 restricted=self.params.get('restrictfilenames'),
 544                 is_id=(k == 'id'))
 545             template_dict = dict((k, sanitize(k, v))
 546                                  for k, v in template_dict.items()
 547                                  if v is not None)
 548             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 549
 550             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 551             tmpl = compat_expanduser(outtmpl)
 552             filename = tmpl % template_dict
 553             # Temporary fix for #4787
 554             # 'Treat' all problem characters by passing filename through preferredencoding
 555             # to workaround encoding issues with subprocess on python2 @ Windows
 556             if sys.version_info < (3, 0) and sys.platform == 'win32':
 557                 filename = encodeFilename(filename, True).decode(preferredencoding())
 558             return filename
 559         except ValueError as err:
 560             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 561             return None
 562
 563     def _match_entry(self, info_dict, incomplete):
 564         """ Returns None iff the file should be downloaded """
 565
 566         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 567         if 'title' in info_dict:
 568             # This can happen when we're just evaluating the playlist
 569             title = info_dict['title']
 570             matchtitle = self.params.get('matchtitle', False)
 571             if matchtitle:
 572                 if not re.search(matchtitle, title, re.IGNORECASE):
 573                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 574             rejecttitle = self.params.get('rejecttitle', False)
 575             if rejecttitle:
 576                 if re.search(rejecttitle, title, re.IGNORECASE):
 577                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 578         date = info_dict.get('upload_date', None)
 579         if date is not None:
 580             dateRange = self.params.get('daterange', DateRange())
 581             if date not in dateRange:
 582                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 583         view_count = info_dict.get('view_count', None)
 584         if view_count is not None:
 585             min_views = self.params.get('min_views')
 586             if min_views is not None and view_count < min_views:
 587                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 588             max_views = self.params.get('max_views')
 589             if max_views is not None and view_count > max_views:
 590                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 591         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 592             return 'Skipping "%s" because it is age restricted' % video_title
 593         if self.in_download_archive(info_dict):
 594             return '%s has already been recorded in archive' % video_title
 595
 596         if not incomplete:
 597             match_filter = self.params.get('match_filter')
 598             if match_filter is not None:
 599                 ret = match_filter(info_dict)
 600                 if ret is not None:
 601                     return ret
 602
 603         return None
 604
 605     @staticmethod
 606     def add_extra_info(info_dict, extra_info):
 607         '''Set the keys from extra_info in info dict if they are missing'''
 608         for key, value in extra_info.items():
 609             info_dict.setdefault(key, value)
 610
 611     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 612                      process=True):
 613         '''
 614         Returns a list with a dictionary for each video we find.
 615         If 'download', also downloads the videos.
 616         extra_info is a dict containing the extra values to add to each result
 617          '''
 618
 619         if ie_key:
 620             ies = [self.get_info_extractor(ie_key)]
 621         else:
 622             ies = self._ies
 623
 624         for ie in ies:
 625             if not ie.suitable(url):
 626                 continue
 627
 628             if not ie.working():
 629                 self.report_warning('The program functionality for this site has been marked as broken, '
 630                                     'and will probably not work.')
 631
 632             try:
 633                 ie_result = ie.extract(url)
 634                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 635                     break
 636                 if isinstance(ie_result, list):
 637                     # Backwards compatibility: old IE result format
 638                     ie_result = {
 639                         '_type': 'compat_list',
 640                         'entries': ie_result,
 641                     }
 642                 self.add_default_extra_info(ie_result, ie, url)
 643                 if process:
 644                     return self.process_ie_result(ie_result, download, extra_info)
 645                 else:
 646                     return ie_result
 647             except ExtractorError as de:  # An error we somewhat expected
 648                 self.report_error(compat_str(de), de.format_traceback())
 649                 break
 650             except MaxDownloadsReached:
 651                 raise
 652             except Exception as e:
 653                 if self.params.get('ignoreerrors', False):
 654                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 655                     break
 656                 else:
 657                     raise
 658         else:
 659             self.report_error('no suitable InfoExtractor for URL %s' % url)
 660
 661     def add_default_extra_info(self, ie_result, ie, url):
 662         self.add_extra_info(ie_result, {
 663             'extractor': ie.IE_NAME,
 664             'webpage_url': url,
 665             'webpage_url_basename': url_basename(url),
 666             'extractor_key': ie.ie_key(),
 667         })
 668
 669     def process_ie_result(self, ie_result, download=True, extra_info={}):
 670         """
 671         Take the result of the ie(may be modified) and resolve all unresolved
 672         references (URLs, playlist items).
 673
 674         It will also download the videos if 'download'.
 675         Returns the resolved ie_result.
 676         """
 677
 678         result_type = ie_result.get('_type', 'video')
 679
 680         if result_type in ('url', 'url_transparent'):
 681             extract_flat = self.params.get('extract_flat', False)
 682             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 683                     extract_flat is True):
 684                 if self.params.get('forcejson', False):
 685                     self.to_stdout(json.dumps(ie_result))
 686                 return ie_result
 687
 688         if result_type == 'video':
 689             self.add_extra_info(ie_result, extra_info)
 690             return self.process_video_result(ie_result, download=download)
 691         elif result_type == 'url':
 692             # We have to add extra_info to the results because it may be
 693             # contained in a playlist
 694             return self.extract_info(ie_result['url'],
 695                                      download,
 696                                      ie_key=ie_result.get('ie_key'),
 697                                      extra_info=extra_info)
 698         elif result_type == 'url_transparent':
 699             # Use the information from the embedding page
 700             info = self.extract_info(
 701                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 702                 extra_info=extra_info, download=False, process=False)
 703
 704             force_properties = dict(
 705                 (k, v) for k, v in ie_result.items() if v is not None)
 706             for f in ('_type', 'url'):
 707                 if f in force_properties:
 708                     del force_properties[f]
 709             new_result = info.copy()
 710             new_result.update(force_properties)
 711
 712             assert new_result.get('_type') != 'url_transparent'
 713
 714             return self.process_ie_result(
 715                 new_result, download=download, extra_info=extra_info)
 716         elif result_type == 'playlist' or result_type == 'multi_video':
 717             # We process each entry in the playlist
 718             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 719             self.to_screen('[download] Downloading playlist: %s' % playlist)
 720
 721             playlist_results = []
 722
 723             playliststart = self.params.get('playliststart', 1) - 1
 724             playlistend = self.params.get('playlistend', None)
 725             # For backwards compatibility, interpret -1 as whole list
 726             if playlistend == -1:
 727                 playlistend = None
 728
 729             playlistitems_str = self.params.get('playlist_items', None)
 730             playlistitems = None
 731             if playlistitems_str is not None:
 732                 def iter_playlistitems(format):
 733                     for string_segment in format.split(','):
 734                         if '-' in string_segment:
 735                             start, end = string_segment.split('-')
 736                             for item in range(int(start), int(end) + 1):
 737                                 yield int(item)
 738                         else:
 739                             yield int(string_segment)
 740                 playlistitems = iter_playlistitems(playlistitems_str)
 741
 742             ie_entries = ie_result['entries']
 743             if isinstance(ie_entries, list):
 744                 n_all_entries = len(ie_entries)
 745                 if playlistitems:
 746                     entries = [ie_entries[i - 1] for i in playlistitems]
 747                 else:
 748                     entries = ie_entries[playliststart:playlistend]
 749                 n_entries = len(entries)
 750                 self.to_screen(
 751                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 752                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 753             elif isinstance(ie_entries, PagedList):
 754                 if playlistitems:
 755                     entries = []
 756                     for item in playlistitems:
 757                         entries.extend(ie_entries.getslice(
 758                             item - 1, item
 759                         ))
 760                 else:
 761                     entries = ie_entries.getslice(
 762                         playliststart, playlistend)
 763                 n_entries = len(entries)
 764                 self.to_screen(
 765                     "[%s] playlist %s: Downloading %d videos" %
 766                     (ie_result['extractor'], playlist, n_entries))
 767             else:  # iterable
 768                 if playlistitems:
 769                     entry_list = list(ie_entries)
 770                     entries = [entry_list[i - 1] for i in playlistitems]
 771                 else:
 772                     entries = list(itertools.islice(
 773                         ie_entries, playliststart, playlistend))
 774                 n_entries = len(entries)
 775                 self.to_screen(
 776                     "[%s] playlist %s: Downloading %d videos" %
 777                     (ie_result['extractor'], playlist, n_entries))
 778
 779             if self.params.get('playlistreverse', False):
 780                 entries = entries[::-1]
 781
 782             for i, entry in enumerate(entries, 1):
 783                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 784                 extra = {
 785                     'n_entries': n_entries,
 786                     'playlist': playlist,
 787                     'playlist_id': ie_result.get('id'),
 788                     'playlist_title': ie_result.get('title'),
 789                     'playlist_index': i + playliststart,
 790                     'extractor': ie_result['extractor'],
 791                     'webpage_url': ie_result['webpage_url'],
 792                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 793                     'extractor_key': ie_result['extractor_key'],
 794                 }
 795
 796                 reason = self._match_entry(entry, incomplete=True)
 797                 if reason is not None:
 798                     self.to_screen('[download] ' + reason)
 799                     continue
 800
 801                 entry_result = self.process_ie_result(entry,
 802                                                       download=download,
 803                                                       extra_info=extra)
 804                 playlist_results.append(entry_result)
 805             ie_result['entries'] = playlist_results
 806             return ie_result
 807         elif result_type == 'compat_list':
 808             self.report_warning(
 809                 'Extractor %s returned a compat_list result. '
 810                 'It needs to be updated.' % ie_result.get('extractor'))
 811
 812             def _fixup(r):
 813                 self.add_extra_info(
 814                     r,
 815                     {
 816                         'extractor': ie_result['extractor'],
 817                         'webpage_url': ie_result['webpage_url'],
 818                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 819                         'extractor_key': ie_result['extractor_key'],
 820                     }
 821                 )
 822                 return r
 823             ie_result['entries'] = [
 824                 self.process_ie_result(_fixup(r), download, extra_info)
 825                 for r in ie_result['entries']
 826             ]
 827             return ie_result
 828         else:
 829             raise Exception('Invalid result type: %s' % result_type)
 830
 831     def _apply_format_filter(self, format_spec, available_formats):
 832         " Returns a tuple of the remaining format_spec and filtered formats "
 833
 834         OPERATORS = {
 835             '<': operator.lt,
 836             '<=': operator.le,
 837             '>': operator.gt,
 838             '>=': operator.ge,
 839             '=': operator.eq,
 840             '!=': operator.ne,
 841         }
 842         operator_rex = re.compile(r'''(?x)\s*\[
 843             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 844             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 845             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 846             \]$
 847             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 848         m = operator_rex.search(format_spec)
 849         if m:
 850             try:
 851                 comparison_value = int(m.group('value'))
 852             except ValueError:
 853                 comparison_value = parse_filesize(m.group('value'))
 854                 if comparison_value is None:
 855                     comparison_value = parse_filesize(m.group('value') + 'B')
 856                 if comparison_value is None:
 857                     raise ValueError(
 858                         'Invalid value %r in format specification %r' % (
 859                             m.group('value'), format_spec))
 860             op = OPERATORS[m.group('op')]
 861
 862         if not m:
 863             STR_OPERATORS = {
 864                 '=': operator.eq,
 865                 '!=': operator.ne,
 866             }
 867             str_operator_rex = re.compile(r'''(?x)\s*\[
 868                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 869                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 870                 \s*(?P<value>[a-zA-Z0-9_-]+)
 871                 \s*\]$
 872                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 873             m = str_operator_rex.search(format_spec)
 874             if m:
 875                 comparison_value = m.group('value')
 876                 op = STR_OPERATORS[m.group('op')]
 877
 878         if not m:
 879             raise ValueError('Invalid format specification %r' % format_spec)
 880
 881         def _filter(f):
 882             actual_value = f.get(m.group('key'))
 883             if actual_value is None:
 884                 return m.group('none_inclusive')
 885             return op(actual_value, comparison_value)
 886         new_formats = [f for f in available_formats if _filter(f)]
 887
 888         new_format_spec = format_spec[:-len(m.group(0))]
 889         if not new_format_spec:
 890             new_format_spec = 'best'
 891
 892         return (new_format_spec, new_formats)
 893
 894     def select_format(self, format_spec, available_formats):
 895         while format_spec.endswith(']'):
 896             format_spec, available_formats = self._apply_format_filter(
 897                 format_spec, available_formats)
 898         if not available_formats:
 899             return None
 900
 901         if format_spec == 'best' or format_spec is None:
 902             return available_formats[-1]
 903         elif format_spec == 'worst':
 904             return available_formats[0]
 905         elif format_spec == 'bestaudio':
 906             audio_formats = [
 907                 f for f in available_formats
 908                 if f.get('vcodec') == 'none']
 909             if audio_formats:
 910                 return audio_formats[-1]
 911         elif format_spec == 'worstaudio':
 912             audio_formats = [
 913                 f for f in available_formats
 914                 if f.get('vcodec') == 'none']
 915             if audio_formats:
 916                 return audio_formats[0]
 917         elif format_spec == 'bestvideo':
 918             video_formats = [
 919                 f for f in available_formats
 920                 if f.get('acodec') == 'none']
 921             if video_formats:
 922                 return video_formats[-1]
 923         elif format_spec == 'worstvideo':
 924             video_formats = [
 925                 f for f in available_formats
 926                 if f.get('acodec') == 'none']
 927             if video_formats:
 928                 return video_formats[0]
 929         else:
 930             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 931             if format_spec in extensions:
 932                 filter_f = lambda f: f['ext'] == format_spec
 933             else:
 934                 filter_f = lambda f: f['format_id'] == format_spec
 935             matches = list(filter(filter_f, available_formats))
 936             if matches:
 937                 return matches[-1]
 938         return None
 939
 940     def _calc_headers(self, info_dict):
 941         res = std_headers.copy()
 942
 943         add_headers = info_dict.get('http_headers')
 944         if add_headers:
 945             res.update(add_headers)
 946
 947         cookies = self._calc_cookies(info_dict)
 948         if cookies:
 949             res['Cookie'] = cookies
 950
 951         return res
 952
 953     def _calc_cookies(self, info_dict):
 954         class _PseudoRequest(object):
 955             def __init__(self, url):
 956                 self.url = url
 957                 self.headers = {}
 958                 self.unverifiable = False
 959
 960             def add_unredirected_header(self, k, v):
 961                 self.headers[k] = v
 962
 963             def get_full_url(self):
 964                 return self.url
 965
 966             def is_unverifiable(self):
 967                 return self.unverifiable
 968
 969             def has_header(self, h):
 970                 return h in self.headers
 971
 972             def get_header(self, h, default=None):
 973                 return self.headers.get(h, default)
 974
 975         pr = _PseudoRequest(info_dict['url'])
 976         self.cookiejar.add_cookie_header(pr)
 977         return pr.headers.get('Cookie')
 978
 979     def process_video_result(self, info_dict, download=True):
 980         assert info_dict.get('_type', 'video') == 'video'
 981
 982         if 'id' not in info_dict:
 983             raise ExtractorError('Missing "id" field in extractor result')
 984         if 'title' not in info_dict:
 985             raise ExtractorError('Missing "title" field in extractor result')
 986
 987         if 'playlist' not in info_dict:
 988             # It isn't part of a playlist
 989             info_dict['playlist'] = None
 990             info_dict['playlist_index'] = None
 991
 992         thumbnails = info_dict.get('thumbnails')
 993         if thumbnails is None:
 994             thumbnail = info_dict.get('thumbnail')
 995             if thumbnail:
 996                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 997         if thumbnails:
 998             thumbnails.sort(key=lambda t: (
 999                 t.get('preference'), t.get('width'), t.get('height'),
1000                 t.get('id'), t.get('url')))
1001             for i, t in enumerate(thumbnails):
1002                 if 'width' in t and 'height' in t:
1003                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1004                 if t.get('id') is None:
1005                     t['id'] = '%d' % i
1006
1007         if thumbnails and 'thumbnail' not in info_dict:
1008             info_dict['thumbnail'] = thumbnails[-1]['url']
1009
1010         if 'display_id' not in info_dict and 'id' in info_dict:
1011             info_dict['display_id'] = info_dict['id']
1012
1013         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1014             # Working around negative timestamps in Windows
1015             # (see http://bugs.python.org/issue1646728)
1016             if info_dict['timestamp'] < 0 and os.name == 'nt':
1017                 info_dict['timestamp'] = 0
1018             upload_date = datetime.datetime.utcfromtimestamp(
1019                 info_dict['timestamp'])
1020             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1021
1022         # This extractors handle format selection themselves
1023         if info_dict['extractor'] in ['Youku']:
1024             if download:
1025                 self.process_info(info_dict)
1026             return info_dict
1027
1028         # We now pick which formats have to be downloaded
1029         if info_dict.get('formats') is None:
1030             # There's only one format available
1031             formats = [info_dict]
1032         else:
1033             formats = info_dict['formats']
1034
1035         if not formats:
1036             raise ExtractorError('No video formats found!')
1037
1038         # We check that all the formats have the format and format_id fields
1039         for i, format in enumerate(formats):
1040             if 'url' not in format:
1041                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1042
1043             if format.get('format_id') is None:
1044                 format['format_id'] = compat_str(i)
1045             if format.get('format') is None:
1046                 format['format'] = '{id} - {res}{note}'.format(
1047                     id=format['format_id'],
1048                     res=self.format_resolution(format),
1049                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1050                 )
1051             # Automatically determine file extension if missing
1052             if 'ext' not in format:
1053                 format['ext'] = determine_ext(format['url']).lower()
1054             # Add HTTP headers, so that external programs can use them from the
1055             # json output
1056             full_format_info = info_dict.copy()
1057             full_format_info.update(format)
1058             format['http_headers'] = self._calc_headers(full_format_info)
1059
1060         format_limit = self.params.get('format_limit', None)
1061         if format_limit:
1062             formats = list(takewhile_inclusive(
1063                 lambda f: f['format_id'] != format_limit, formats
1064             ))
1065
1066         # TODO Central sorting goes here
1067
1068         if formats[0] is not info_dict:
1069             # only set the 'formats' fields if the original info_dict list them
1070             # otherwise we end up with a circular reference, the first (and unique)
1071             # element in the 'formats' field in info_dict is info_dict itself,
1072             # wich can't be exported to json
1073             info_dict['formats'] = formats
1074         if self.params.get('listformats'):
1075             self.list_formats(info_dict)
1076             return
1077         if self.params.get('list_thumbnails'):
1078             self.list_thumbnails(info_dict)
1079             return
1080
1081         req_format = self.params.get('format')
1082         if req_format is None:
1083             req_format = 'best'
1084         formats_to_download = []
1085         # The -1 is for supporting YoutubeIE
1086         if req_format in ('-1', 'all'):
1087             formats_to_download = formats
1088         else:
1089             for rfstr in req_format.split(','):
1090                 # We can accept formats requested in the format: 34/5/best, we pick
1091                 # the first that is available, starting from left
1092                 req_formats = rfstr.split('/')
1093                 for rf in req_formats:
1094                     if re.match(r'.+?\+.+?', rf) is not None:
1095                         # Two formats have been requested like '137+139'
1096                         format_1, format_2 = rf.split('+')
1097                         formats_info = (self.select_format(format_1, formats),
1098                                         self.select_format(format_2, formats))
1099                         if all(formats_info):
1100                             # The first format must contain the video and the
1101                             # second the audio
1102                             if formats_info[0].get('vcodec') == 'none':
1103                                 self.report_error('The first format must '
1104                                                   'contain the video, try using '
1105                                                   '"-f %s+%s"' % (format_2, format_1))
1106                                 return
1107                             output_ext = (
1108                                 formats_info[0]['ext']
1109                                 if self.params.get('merge_output_format') is None
1110                                 else self.params['merge_output_format'])
1111                             selected_format = {
1112                                 'requested_formats': formats_info,
1113                                 'format': '%s+%s' % (formats_info[0].get('format'),
1114                                                      formats_info[1].get('format')),
1115                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1116                                                         formats_info[1].get('format_id')),
1117                                 'width': formats_info[0].get('width'),
1118                                 'height': formats_info[0].get('height'),
1119                                 'resolution': formats_info[0].get('resolution'),
1120                                 'fps': formats_info[0].get('fps'),
1121                                 'vcodec': formats_info[0].get('vcodec'),
1122                                 'vbr': formats_info[0].get('vbr'),
1123                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1124                                 'acodec': formats_info[1].get('acodec'),
1125                                 'abr': formats_info[1].get('abr'),
1126                                 'ext': output_ext,
1127                             }
1128                         else:
1129                             selected_format = None
1130                     else:
1131                         selected_format = self.select_format(rf, formats)
1132                     if selected_format is not None:
1133                         formats_to_download.append(selected_format)
1134                         break
1135         if not formats_to_download:
1136             raise ExtractorError('requested format not available',
1137                                  expected=True)
1138
1139         if download:
1140             if len(formats_to_download) > 1:
1141                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1142             for format in formats_to_download:
1143                 new_info = dict(info_dict)
1144                 new_info.update(format)
1145                 self.process_info(new_info)
1146         # We update the info dict with the best quality format (backwards compatibility)
1147         info_dict.update(formats_to_download[-1])
1148         return info_dict
1149
1150     def process_info(self, info_dict):
1151         """Process a single resolved IE result."""
1152
1153         assert info_dict.get('_type', 'video') == 'video'
1154
1155         max_downloads = self.params.get('max_downloads')
1156         if max_downloads is not None:
1157             if self._num_downloads >= int(max_downloads):
1158                 raise MaxDownloadsReached()
1159
1160         info_dict['fulltitle'] = info_dict['title']
1161         if len(info_dict['title']) > 200:
1162             info_dict['title'] = info_dict['title'][:197] + '...'
1163
1164         # Keep for backwards compatibility
1165         info_dict['stitle'] = info_dict['title']
1166
1167         if 'format' not in info_dict:
1168             info_dict['format'] = info_dict['ext']
1169
1170         reason = self._match_entry(info_dict, incomplete=False)
1171         if reason is not None:
1172             self.to_screen('[download] ' + reason)
1173             return
1174
1175         self._num_downloads += 1
1176
1177         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1178
1179         # Forced printings
1180         if self.params.get('forcetitle', False):
1181             self.to_stdout(info_dict['fulltitle'])
1182         if self.params.get('forceid', False):
1183             self.to_stdout(info_dict['id'])
1184         if self.params.get('forceurl', False):
1185             if info_dict.get('requested_formats') is not None:
1186                 for f in info_dict['requested_formats']:
1187                     self.to_stdout(f['url'] + f.get('play_path', ''))
1188             else:
1189                 # For RTMP URLs, also include the playpath
1190                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1191         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1192             self.to_stdout(info_dict['thumbnail'])
1193         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1194             self.to_stdout(info_dict['description'])
1195         if self.params.get('forcefilename', False) and filename is not None:
1196             self.to_stdout(filename)
1197         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1198             self.to_stdout(formatSeconds(info_dict['duration']))
1199         if self.params.get('forceformat', False):
1200             self.to_stdout(info_dict['format'])
1201         if self.params.get('forcejson', False):
1202             self.to_stdout(json.dumps(info_dict))
1203
1204         # Do nothing else if in simulate mode
1205         if self.params.get('simulate', False):
1206             return
1207
1208         if filename is None:
1209             return
1210
1211         try:
1212             dn = os.path.dirname(encodeFilename(filename))
1213             if dn and not os.path.exists(dn):
1214                 os.makedirs(dn)
1215         except (OSError, IOError) as err:
1216             self.report_error('unable to create directory ' + compat_str(err))
1217             return
1218
1219         if self.params.get('writedescription', False):
1220             descfn = filename + '.description'
1221             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1222                 self.to_screen('[info] Video description is already present')
1223             elif info_dict.get('description') is None:
1224                 self.report_warning('There\'s no description to write.')
1225             else:
1226                 try:
1227                     self.to_screen('[info] Writing video description to: ' + descfn)
1228                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1229                         descfile.write(info_dict['description'])
1230                 except (OSError, IOError):
1231                     self.report_error('Cannot write description file ' + descfn)
1232                     return
1233
1234         if self.params.get('writeannotations', False):
1235             annofn = filename + '.annotations.xml'
1236             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1237                 self.to_screen('[info] Video annotations are already present')
1238             else:
1239                 try:
1240                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1241                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1242                         annofile.write(info_dict['annotations'])
1243                 except (KeyError, TypeError):
1244                     self.report_warning('There are no annotations to write.')
1245                 except (OSError, IOError):
1246                     self.report_error('Cannot write annotations file: ' + annofn)
1247                     return
1248
1249         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1250                                        self.params.get('writeautomaticsub')])
1251
1252         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1253             # subtitles download errors are already managed as troubles in relevant IE
1254             # that way it will silently go on when used with unsupporting IE
1255             subtitles = info_dict['subtitles']
1256             sub_format = self.params.get('subtitlesformat', 'srt')
1257             for sub_lang in subtitles.keys():
1258                 sub = subtitles[sub_lang]
1259                 if sub is None:
1260                     continue
1261                 try:
1262                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1263                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1264                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1265                     else:
1266                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1267                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1268                             subfile.write(sub)
1269                 except (OSError, IOError):
1270                     self.report_error('Cannot write subtitles file ' + sub_filename)
1271                     return
1272
1273         if self.params.get('writeinfojson', False):
1274             infofn = os.path.splitext(filename)[0] + '.info.json'
1275             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1276                 self.to_screen('[info] Video description metadata is already present')
1277             else:
1278                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1279                 try:
1280                     write_json_file(info_dict, infofn)
1281                 except (OSError, IOError):
1282                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1283                     return
1284
1285         self._write_thumbnails(info_dict, filename)
1286
1287         if not self.params.get('skip_download', False):
1288             try:
1289                 def dl(name, info):
1290                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1291                     for ph in self._progress_hooks:
1292                         fd.add_progress_hook(ph)
1293                     if self.params.get('verbose'):
1294                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1295                     return fd.download(name, info)
1296
1297                 if info_dict.get('requested_formats') is not None:
1298                     downloaded = []
1299                     success = True
1300                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1301                     if not merger._executable:
1302                         postprocessors = []
1303                         self.report_warning('You have requested multiple '
1304                                             'formats but ffmpeg or avconv are not installed.'
1305                                             ' The formats won\'t be merged')
1306                     else:
1307                         postprocessors = [merger]
1308                     for f in info_dict['requested_formats']:
1309                         new_info = dict(info_dict)
1310                         new_info.update(f)
1311                         fname = self.prepare_filename(new_info)
1312                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1313                         downloaded.append(fname)
1314                         partial_success = dl(fname, new_info)
1315                         success = success and partial_success
1316                     info_dict['__postprocessors'] = postprocessors
1317                     info_dict['__files_to_merge'] = downloaded
1318                 else:
1319                     # Just a single file
1320                     success = dl(filename, info_dict)
1321             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1322                 self.report_error('unable to download video data: %s' % str(err))
1323                 return
1324             except (OSError, IOError) as err:
1325                 raise UnavailableVideoError(err)
1326             except (ContentTooShortError, ) as err:
1327                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1328                 return
1329
1330             if success:
1331                 # Fixup content
1332                 fixup_policy = self.params.get('fixup')
1333                 if fixup_policy is None:
1334                     fixup_policy = 'detect_or_warn'
1335
1336                 stretched_ratio = info_dict.get('stretched_ratio')
1337                 if stretched_ratio is not None and stretched_ratio != 1:
1338                     if fixup_policy == 'warn':
1339                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1340                             info_dict['id'], stretched_ratio))
1341                     elif fixup_policy == 'detect_or_warn':
1342                         stretched_pp = FFmpegFixupStretchedPP(self)
1343                         if stretched_pp.available:
1344                             info_dict.setdefault('__postprocessors', [])
1345                             info_dict['__postprocessors'].append(stretched_pp)
1346                         else:
1347                             self.report_warning(
1348                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1349                                     info_dict['id'], stretched_ratio))
1350                     else:
1351                         assert fixup_policy in ('ignore', 'never')
1352
1353                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1354                     if fixup_policy == 'warn':
1355                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1356                             info_dict['id']))
1357                     elif fixup_policy == 'detect_or_warn':
1358                         fixup_pp = FFmpegFixupM4aPP(self)
1359                         if fixup_pp.available:
1360                             info_dict.setdefault('__postprocessors', [])
1361                             info_dict['__postprocessors'].append(fixup_pp)
1362                         else:
1363                             self.report_warning(
1364                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1365                                     info_dict['id']))
1366                     else:
1367                         assert fixup_policy in ('ignore', 'never')
1368
1369                 try:
1370                     self.post_process(filename, info_dict)
1371                 except (PostProcessingError) as err:
1372                     self.report_error('postprocessing: %s' % str(err))
1373                     return
1374                 self.record_download_archive(info_dict)
1375
1376     def download(self, url_list):
1377         """Download a given list of URLs."""
1378         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1379         if (len(url_list) > 1 and
1380                 '%' not in outtmpl
1381                 and self.params.get('max_downloads') != 1):
1382             raise SameFileError(outtmpl)
1383
1384         for url in url_list:
1385             try:
1386                 # It also downloads the videos
1387                 res = self.extract_info(url)
1388             except UnavailableVideoError:
1389                 self.report_error('unable to download video')
1390             except MaxDownloadsReached:
1391                 self.to_screen('[info] Maximum number of downloaded files reached.')
1392                 raise
1393             else:
1394                 if self.params.get('dump_single_json', False):
1395                     self.to_stdout(json.dumps(res))
1396
1397         return self._download_retcode
1398
1399     def download_with_info_file(self, info_filename):
1400         with io.open(info_filename, 'r', encoding='utf-8') as f:
1401             info = json.load(f)
1402         try:
1403             self.process_ie_result(info, download=True)
1404         except DownloadError:
1405             webpage_url = info.get('webpage_url')
1406             if webpage_url is not None:
1407                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1408                 return self.download([webpage_url])
1409             else:
1410                 raise
1411         return self._download_retcode
1412
1413     def post_process(self, filename, ie_info):
1414         """Run all the postprocessors on the given file."""
1415         info = dict(ie_info)
1416         info['filepath'] = filename
1417         pps_chain = []
1418         if ie_info.get('__postprocessors') is not None:
1419             pps_chain.extend(ie_info['__postprocessors'])
1420         pps_chain.extend(self._pps)
1421         for pp in pps_chain:
1422             keep_video = None
1423             old_filename = info['filepath']
1424             try:
1425                 keep_video_wish, info = pp.run(info)
1426                 if keep_video_wish is not None:
1427                     if keep_video_wish:
1428                         keep_video = keep_video_wish
1429                     elif keep_video is None:
1430                         # No clear decision yet, let IE decide
1431                         keep_video = keep_video_wish
1432             except PostProcessingError as e:
1433                 self.report_error(e.msg)
1434             if keep_video is False and not self.params.get('keepvideo', False):
1435                 try:
1436                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1437                     os.remove(encodeFilename(old_filename))
1438                 except (IOError, OSError):
1439                     self.report_warning('Unable to remove downloaded video file')
1440
1441     def _make_archive_id(self, info_dict):
1442         # Future-proof against any change in case
1443         # and backwards compatibility with prior versions
1444         extractor = info_dict.get('extractor_key')
1445         if extractor is None:
1446             if 'id' in info_dict:
1447                 extractor = info_dict.get('ie_key')  # key in a playlist
1448         if extractor is None:
1449             return None  # Incomplete video information
1450         return extractor.lower() + ' ' + info_dict['id']
1451
1452     def in_download_archive(self, info_dict):
1453         fn = self.params.get('download_archive')
1454         if fn is None:
1455             return False
1456
1457         vid_id = self._make_archive_id(info_dict)
1458         if vid_id is None:
1459             return False  # Incomplete video information
1460
1461         try:
1462             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1463                 for line in archive_file:
1464                     if line.strip() == vid_id:
1465                         return True
1466         except IOError as ioe:
1467             if ioe.errno != errno.ENOENT:
1468                 raise
1469         return False
1470
1471     def record_download_archive(self, info_dict):
1472         fn = self.params.get('download_archive')
1473         if fn is None:
1474             return
1475         vid_id = self._make_archive_id(info_dict)
1476         assert vid_id
1477         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1478             archive_file.write(vid_id + '\n')
1479
1480     @staticmethod
1481     def format_resolution(format, default='unknown'):
1482         if format.get('vcodec') == 'none':
1483             return 'audio only'
1484         if format.get('resolution') is not None:
1485             return format['resolution']
1486         if format.get('height') is not None:
1487             if format.get('width') is not None:
1488                 res = '%sx%s' % (format['width'], format['height'])
1489             else:
1490                 res = '%sp' % format['height']
1491         elif format.get('width') is not None:
1492             res = '?x%d' % format['width']
1493         else:
1494             res = default
1495         return res
1496
1497     def _format_note(self, fdict):
1498         res = ''
1499         if fdict.get('ext') in ['f4f', 'f4m']:
1500             res += '(unsupported) '
1501         if fdict.get('format_note') is not None:
1502             res += fdict['format_note'] + ' '
1503         if fdict.get('tbr') is not None:
1504             res += '%4dk ' % fdict['tbr']
1505         if fdict.get('container') is not None:
1506             if res:
1507                 res += ', '
1508             res += '%s container' % fdict['container']
1509         if (fdict.get('vcodec') is not None and
1510                 fdict.get('vcodec') != 'none'):
1511             if res:
1512                 res += ', '
1513             res += fdict['vcodec']
1514             if fdict.get('vbr') is not None:
1515                 res += '@'
1516         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1517             res += 'video@'
1518         if fdict.get('vbr') is not None:
1519             res += '%4dk' % fdict['vbr']
1520         if fdict.get('fps') is not None:
1521             res += ', %sfps' % fdict['fps']
1522         if fdict.get('acodec') is not None:
1523             if res:
1524                 res += ', '
1525             if fdict['acodec'] == 'none':
1526                 res += 'video only'
1527             else:
1528                 res += '%-5s' % fdict['acodec']
1529         elif fdict.get('abr') is not None:
1530             if res:
1531                 res += ', '
1532             res += 'audio'
1533         if fdict.get('abr') is not None:
1534             res += '@%3dk' % fdict['abr']
1535         if fdict.get('asr') is not None:
1536             res += ' (%5dHz)' % fdict['asr']
1537         if fdict.get('filesize') is not None:
1538             if res:
1539                 res += ', '
1540             res += format_bytes(fdict['filesize'])
1541         elif fdict.get('filesize_approx') is not None:
1542             if res:
1543                 res += ', '
1544             res += '~' + format_bytes(fdict['filesize_approx'])
1545         return res
1546
1547     def list_formats(self, info_dict):
1548         def line(format, idlen=20):
1549             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1550                 format['format_id'],
1551                 format['ext'],
1552                 self.format_resolution(format),
1553                 self._format_note(format),
1554             ))
1555
1556         formats = info_dict.get('formats', [info_dict])
1557         idlen = max(len('format code'),
1558                     max(len(f['format_id']) for f in formats))
1559         formats_s = [
1560             line(f, idlen) for f in formats
1561             if f.get('preference') is None or f['preference'] >= -1000]
1562         if len(formats) > 1:
1563             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1564
1565         header_line = line({
1566             'format_id': 'format code', 'ext': 'extension',
1567             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1568         self.to_screen(
1569             '[info] Available formats for %s:\n%s\n%s' %
1570             (info_dict['id'], header_line, '\n'.join(formats_s)))
1571
1572     def list_thumbnails(self, info_dict):
1573         thumbnails = info_dict.get('thumbnails')
1574         if not thumbnails:
1575             tn_url = info_dict.get('thumbnail')
1576             if tn_url:
1577                 thumbnails = [{'id': '0', 'url': tn_url}]
1578             else:
1579                 self.to_screen(
1580                     '[info] No thumbnails present for %s' % info_dict['id'])
1581                 return
1582
1583         self.to_screen(
1584             '[info] Thumbnails for %s:' % info_dict['id'])
1585         self.to_screen(render_table(
1586             ['ID', 'width', 'height', 'URL'],
1587             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1588
1589     def urlopen(self, req):
1590         """ Start an HTTP download """
1591
1592         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1593         # always respected by websites, some tend to give out URLs with non percent-encoded
1594         # non-ASCII characters (see telemb.py, ard.py [#3412])
1595         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1596         # To work around aforementioned issue we will replace request's original URL with
1597         # percent-encoded one
1598         req_is_string = isinstance(req, compat_basestring)
1599         url = req if req_is_string else req.get_full_url()
1600         url_escaped = escape_url(url)
1601
1602         # Substitute URL if any change after escaping
1603         if url != url_escaped:
1604             if req_is_string:
1605                 req = url_escaped
1606             else:
1607                 req = compat_urllib_request.Request(
1608                     url_escaped, data=req.data, headers=req.headers,
1609                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1610
1611         return self._opener.open(req, timeout=self._socket_timeout)
1612
1613     def print_debug_header(self):
1614         if not self.params.get('verbose'):
1615             return
1616
1617         if type('') is not compat_str:
1618             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1619             self.report_warning(
1620                 'Your Python is broken! Update to a newer and supported version')
1621
1622         stdout_encoding = getattr(
1623             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1624         encoding_str = (
1625             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1626                 locale.getpreferredencoding(),
1627                 sys.getfilesystemencoding(),
1628                 stdout_encoding,
1629                 self.get_encoding()))
1630         write_string(encoding_str, encoding=None)
1631
1632         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1633         try:
1634             sp = subprocess.Popen(
1635                 ['git', 'rev-parse', '--short', 'HEAD'],
1636                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1637                 cwd=os.path.dirname(os.path.abspath(__file__)))
1638             out, err = sp.communicate()
1639             out = out.decode().strip()
1640             if re.match('[0-9a-f]+', out):
1641                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1642         except:
1643             try:
1644                 sys.exc_clear()
1645             except:
1646                 pass
1647         self._write_string('[debug] Python version %s - %s\n' % (
1648             platform.python_version(), platform_name()))
1649
1650         exe_versions = FFmpegPostProcessor.get_versions()
1651         exe_versions['rtmpdump'] = rtmpdump_version()
1652         exe_str = ', '.join(
1653             '%s %s' % (exe, v)
1654             for exe, v in sorted(exe_versions.items())
1655             if v
1656         )
1657         if not exe_str:
1658             exe_str = 'none'
1659         self._write_string('[debug] exe versions: %s\n' % exe_str)
1660
1661         proxy_map = {}
1662         for handler in self._opener.handlers:
1663             if hasattr(handler, 'proxies'):
1664                 proxy_map.update(handler.proxies)
1665         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1666
1667         if self.params.get('call_home', False):
1668             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1669             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1670             latest_version = self.urlopen(
1671                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1672             if version_tuple(latest_version) > version_tuple(__version__):
1673                 self.report_warning(
1674                     'You are using an outdated version (newest version: %s)! '
1675                     'See https://yt-dl.org/update if you need help updating.' %
1676                     latest_version)
1677
1678     def _setup_opener(self):
1679         timeout_val = self.params.get('socket_timeout')
1680         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1681
1682         opts_cookiefile = self.params.get('cookiefile')
1683         opts_proxy = self.params.get('proxy')
1684
1685         if opts_cookiefile is None:
1686             self.cookiejar = compat_cookiejar.CookieJar()
1687         else:
1688             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1689                 opts_cookiefile)
1690             if os.access(opts_cookiefile, os.R_OK):
1691                 self.cookiejar.load()
1692
1693         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1694             self.cookiejar)
1695         if opts_proxy is not None:
1696             if opts_proxy == '':
1697                 proxies = {}
1698             else:
1699                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1700         else:
1701             proxies = compat_urllib_request.getproxies()
1702             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1703             if 'http' in proxies and 'https' not in proxies:
1704                 proxies['https'] = proxies['http']
1705         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1706
1707         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1708         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1709         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1710         opener = compat_urllib_request.build_opener(
1711             https_handler, proxy_handler, cookie_processor, ydlh)
1712         # Delete the default user-agent header, which would otherwise apply in
1713         # cases where our custom HTTP handler doesn't come into play
1714         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1715         opener.addheaders = []
1716         self._opener = opener
1717
1718     def encode(self, s):
1719         if isinstance(s, bytes):
1720             return s  # Already encoded
1721
1722         try:
1723             return s.encode(self.get_encoding())
1724         except UnicodeEncodeError as err:
1725             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1726             raise
1727
1728     def get_encoding(self):
1729         encoding = self.params.get('encoding')
1730         if encoding is None:
1731             encoding = preferredencoding()
1732         return encoding
1733
1734     def _write_thumbnails(self, info_dict, filename):
1735         if self.params.get('writethumbnail', False):
1736             thumbnails = info_dict.get('thumbnails')
1737             if thumbnails:
1738                 thumbnails = [thumbnails[-1]]
1739         elif self.params.get('write_all_thumbnails', False):
1740             thumbnails = info_dict.get('thumbnails')
1741         else:
1742             return
1743
1744         if not thumbnails:
1745             # No thumbnails present, so return immediately
1746             return
1747
1748         for t in thumbnails:
1749             thumb_ext = determine_ext(t['url'], 'jpg')
1750             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1751             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1752             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1753
1754             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1755                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1756                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1757             else:
1758                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1759                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1760                 try:
1761                     uf = self.urlopen(t['url'])
1762                     with open(thumb_filename, 'wb') as thumbf:
1763                         shutil.copyfileobj(uf, thumbf)
1764                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1765                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1766                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1767                     self.report_warning('Unable to download thumbnail "%s": %s' %
1768                                         (t['url'], compat_str(err)))