youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     subtitles_filename,
  60     takewhile_inclusive,
  61     UnavailableVideoError,
  62     url_basename,
  63     version_tuple,
  64     write_json_file,
  65     write_string,
  66     YoutubeDLHandler,
  67     prepend_extension,
  68     args_to_str,
  69     age_restricted,
  70 )
  71 from .cache import Cache
  72 from .extractor import get_info_extractor, gen_extractors
  73 from .downloader import get_suitable_downloader
  74 from .downloader.rtmp import rtmpdump_version
  75 from .postprocessor import (
  76     FFmpegFixupM4aPP,
  77     FFmpegFixupStretchedPP,
  78     FFmpegMergerPP,
  79     FFmpegPostProcessor,
  80     get_postprocessor,
  81 )
  82 from .version import __version__
  83
  84
  85 class YoutubeDL(object):
  86     """YoutubeDL class.
  87
  88     YoutubeDL objects are the ones responsible of downloading the
  89     actual video file and writing it to disk if the user has requested
  90     it, among some other tasks. In most cases there should be one per
  91     program. As, given a video URL, the downloader doesn't know how to
  92     extract all the needed information, task that InfoExtractors do, it
  93     has to pass the URL to one of them.
  94
  95     For this, YoutubeDL objects have a method that allows
  96     InfoExtractors to be registered in a given order. When it is passed
  97     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  98     finds that reports being able to handle it. The InfoExtractor extracts
  99     all the information about the video or videos the URL refers to, and
 100     YoutubeDL process the extracted information, possibly using a File
 101     Downloader to download the video.
 102
 103     YoutubeDL objects accept a lot of parameters. In order not to saturate
 104     the object constructor with arguments, it receives a dictionary of
 105     options instead. These options are available through the params
 106     attribute for the InfoExtractors to use. The YoutubeDL also
 107     registers itself as the downloader in charge for the InfoExtractors
 108     that are added to it, so this is a "mutual registration".
 109
 110     Available options:
 111
 112     username:          Username for authentication purposes.
 113     password:          Password for authentication purposes.
 114     videopassword:     Password for acces a video.
 115     usenetrc:          Use netrc for authentication instead.
 116     verbose:           Print additional info to stdout.
 117     quiet:             Do not print messages to stdout.
 118     no_warnings:       Do not print out anything for warnings.
 119     forceurl:          Force printing final URL.
 120     forcetitle:        Force printing title.
 121     forceid:           Force printing ID.
 122     forcethumbnail:    Force printing thumbnail URL.
 123     forcedescription:  Force printing description.
 124     forcefilename:     Force printing final filename.
 125     forceduration:     Force printing duration.
 126     forcejson:         Force printing info_dict as JSON.
 127     dump_single_json:  Force printing the info_dict of the whole playlist
 128                        (or video) as a single JSON line.
 129     simulate:          Do not download the video files.
 130     format:            Video format code. See options.py for more information.
 131     format_limit:      Highest quality format to try.
 132     outtmpl:           Template for output names.
 133     restrictfilenames: Do not allow "&" and spaces in file names
 134     ignoreerrors:      Do not stop on download errors.
 135     nooverwrites:      Prevent overwriting files.
 136     playliststart:     Playlist item to start at.
 137     playlistend:       Playlist item to end at.
 138     playlistreverse:   Download playlist items in reverse order.
 139     matchtitle:        Download only matching titles.
 140     rejecttitle:       Reject downloads for matching titles.
 141     logger:            Log messages to a logging.Logger instance.
 142     logtostderr:       Log messages to stderr instead of stdout.
 143     writedescription:  Write the video description to a .description file
 144     writeinfojson:     Write the video description to a .info.json file
 145     writeannotations:  Write the video annotations to a .annotations.xml file
 146     writethumbnail:    Write the thumbnail image to a file
 147     writesubtitles:    Write the video subtitles to a file
 148     writeautomaticsub: Write the automatic subtitles to a file
 149     allsubtitles:      Downloads all the subtitles of the video
 150                        (requires writesubtitles or writeautomaticsub)
 151     listsubtitles:     Lists all available subtitles for the video
 152     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 153     subtitleslangs:    List of languages of the subtitles to download
 154     keepvideo:         Keep the video file after post-processing
 155     daterange:         A DateRange object, download only if the upload_date is in the range.
 156     skip_download:     Skip the actual download of the video file
 157     cachedir:          Location of the cache files in the filesystem.
 158                        False to disable filesystem cache.
 159     noplaylist:        Download single video instead of a playlist if in doubt.
 160     age_limit:         An integer representing the user's age in years.
 161                        Unsuitable videos for the given age are skipped.
 162     min_views:         An integer representing the minimum view count the video
 163                        must have in order to not be skipped.
 164                        Videos without view count information are always
 165                        downloaded. None for no limit.
 166     max_views:         An integer representing the maximum view count.
 167                        Videos that are more popular than that are not
 168                        downloaded.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     download_archive:  File name of a file where all downloads are recorded.
 172                        Videos already present in the file are not downloaded
 173                        again.
 174     cookiefile:        File name where cookies should be read from and dumped to.
 175     nocheckcertificate:Do not verify SSL certificates
 176     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 177                        At the moment, this is only supported by YouTube.
 178     proxy:             URL of the proxy server to use
 179     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 180     bidi_workaround:   Work around buggy terminals without bidirectional text
 181                        support, using fridibi
 182     debug_printtraffic:Print out sent and received HTTP traffic
 183     include_ads:       Download ads as well
 184     default_search:    Prepend this string if an input url is not valid.
 185                        'auto' for elaborate guessing
 186     encoding:          Use this encoding instead of the system-specified.
 187     extract_flat:      Do not resolve URLs, return the immediate result.
 188                        Pass in 'in_playlist' to only show this behavior for
 189                        playlist items.
 190     postprocessors:    A list of dictionaries, each with an entry
 191                        * key:  The name of the postprocessor. See
 192                                youtube_dl/postprocessor/__init__.py for a list.
 193                        as well as any further keyword arguments for the
 194                        postprocessor.
 195     progress_hooks:    A list of functions that get called on download
 196                        progress, with a dictionary with the entries
 197                        * filename: The final filename
 198                        * status: One of "downloading" and "finished"
 199
 200                        The dict may also have some of the following entries:
 201
 202                        * downloaded_bytes: Bytes on disk
 203                        * total_bytes: Size of the whole file, None if unknown
 204                        * tmpfilename: The filename we're currently writing to
 205                        * eta: The estimated time in seconds, None if unknown
 206                        * speed: The download speed in bytes/second, None if
 207                                 unknown
 208
 209                        Progress hooks are guaranteed to be called at least once
 210                        (with status "finished") if the download is successful.
 211     merge_output_format: Extension to use when merging formats.
 212     fixup:             Automatically correct known faults of the file.
 213                        One of:
 214                        - "never": do nothing
 215                        - "warn": only emit a warning
 216                        - "detect_or_warn": check whether we can do anything
 217                                            about it, warn otherwise (default)
 218     source_address:    (Experimental) Client-side IP address to bind to.
 219     call_home:         Boolean, true iff we are allowed to contact the
 220                        youtube-dl servers for debugging.
 221     sleep_interval:    Number of seconds to sleep before each download.
 222     external_downloader:  Executable of the external downloader to call.
 223
 224
 225     The following parameters are not used by YoutubeDL itself, they are used by
 226     the FileDownloader:
 227     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 228     noresizebuffer, retries, continuedl, noprogress, consoletitle
 229
 230     The following options are used by the post processors:
 231     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 232                        otherwise prefer avconv.
 233     exec_cmd:          Arbitrary command to run after downloading
 234     """
 235
 236     params = None
 237     _ies = []
 238     _pps = []
 239     _download_retcode = None
 240     _num_downloads = None
 241     _screen_file = None
 242
 243     def __init__(self, params=None, auto_init=True):
 244         """Create a FileDownloader object with the given options."""
 245         if params is None:
 246             params = {}
 247         self._ies = []
 248         self._ies_instances = {}
 249         self._pps = []
 250         self._progress_hooks = []
 251         self._download_retcode = 0
 252         self._num_downloads = 0
 253         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 254         self._err_file = sys.stderr
 255         self.params = params
 256         self.cache = Cache(self)
 257
 258         if params.get('bidi_workaround', False):
 259             try:
 260                 import pty
 261                 master, slave = pty.openpty()
 262                 width = get_term_width()
 263                 if width is None:
 264                     width_args = []
 265                 else:
 266                     width_args = ['-w', str(width)]
 267                 sp_kwargs = dict(
 268                     stdin=subprocess.PIPE,
 269                     stdout=slave,
 270                     stderr=self._err_file)
 271                 try:
 272                     self._output_process = subprocess.Popen(
 273                         ['bidiv'] + width_args, **sp_kwargs
 274                     )
 275                 except OSError:
 276                     self._output_process = subprocess.Popen(
 277                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 278                 self._output_channel = os.fdopen(master, 'rb')
 279             except OSError as ose:
 280                 if ose.errno == 2:
 281                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 282                 else:
 283                     raise
 284
 285         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 286                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 287                 and not params.get('restrictfilenames', False)):
 288             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 289             self.report_warning(
 290                 'Assuming --restrict-filenames since file system encoding '
 291                 'cannot encode all characters. '
 292                 'Set the LC_ALL environment variable to fix this.')
 293             self.params['restrictfilenames'] = True
 294
 295         if '%(stitle)s' in self.params.get('outtmpl', ''):
 296             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 297
 298         self._setup_opener()
 299
 300         if auto_init:
 301             self.print_debug_header()
 302             self.add_default_info_extractors()
 303
 304         for pp_def_raw in self.params.get('postprocessors', []):
 305             pp_class = get_postprocessor(pp_def_raw['key'])
 306             pp_def = dict(pp_def_raw)
 307             del pp_def['key']
 308             pp = pp_class(self, **compat_kwargs(pp_def))
 309             self.add_post_processor(pp)
 310
 311         for ph in self.params.get('progress_hooks', []):
 312             self.add_progress_hook(ph)
 313
 314     def warn_if_short_id(self, argv):
 315         # short YouTube ID starting with dash?
 316         idxs = [
 317             i for i, a in enumerate(argv)
 318             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 319         if idxs:
 320             correct_argv = (
 321                 ['youtube-dl'] +
 322                 [a for i, a in enumerate(argv) if i not in idxs] +
 323                 ['--'] + [argv[i] for i in idxs]
 324             )
 325             self.report_warning(
 326                 'Long argument string detected. '
 327                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 328                 args_to_str(correct_argv))
 329
 330     def add_info_extractor(self, ie):
 331         """Add an InfoExtractor object to the end of the list."""
 332         self._ies.append(ie)
 333         self._ies_instances[ie.ie_key()] = ie
 334         ie.set_downloader(self)
 335
 336     def get_info_extractor(self, ie_key):
 337         """
 338         Get an instance of an IE with name ie_key, it will try to get one from
 339         the _ies list, if there's no instance it will create a new one and add
 340         it to the extractor list.
 341         """
 342         ie = self._ies_instances.get(ie_key)
 343         if ie is None:
 344             ie = get_info_extractor(ie_key)()
 345             self.add_info_extractor(ie)
 346         return ie
 347
 348     def add_default_info_extractors(self):
 349         """
 350         Add the InfoExtractors returned by gen_extractors to the end of the list
 351         """
 352         for ie in gen_extractors():
 353             self.add_info_extractor(ie)
 354
 355     def add_post_processor(self, pp):
 356         """Add a PostProcessor object to the end of the chain."""
 357         self._pps.append(pp)
 358         pp.set_downloader(self)
 359
 360     def add_progress_hook(self, ph):
 361         """Add the progress hook (currently only for the file downloader)"""
 362         self._progress_hooks.append(ph)
 363
 364     def _bidi_workaround(self, message):
 365         if not hasattr(self, '_output_channel'):
 366             return message
 367
 368         assert hasattr(self, '_output_process')
 369         assert isinstance(message, compat_str)
 370         line_count = message.count('\n') + 1
 371         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 372         self._output_process.stdin.flush()
 373         res = ''.join(self._output_channel.readline().decode('utf-8')
 374                       for _ in range(line_count))
 375         return res[:-len('\n')]
 376
 377     def to_screen(self, message, skip_eol=False):
 378         """Print message to stdout if not in quiet mode."""
 379         return self.to_stdout(message, skip_eol, check_quiet=True)
 380
 381     def _write_string(self, s, out=None):
 382         write_string(s, out=out, encoding=self.params.get('encoding'))
 383
 384     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 385         """Print message to stdout if not in quiet mode."""
 386         if self.params.get('logger'):
 387             self.params['logger'].debug(message)
 388         elif not check_quiet or not self.params.get('quiet', False):
 389             message = self._bidi_workaround(message)
 390             terminator = ['\n', ''][skip_eol]
 391             output = message + terminator
 392
 393             self._write_string(output, self._screen_file)
 394
 395     def to_stderr(self, message):
 396         """Print message to stderr."""
 397         assert isinstance(message, compat_str)
 398         if self.params.get('logger'):
 399             self.params['logger'].error(message)
 400         else:
 401             message = self._bidi_workaround(message)
 402             output = message + '\n'
 403             self._write_string(output, self._err_file)
 404
 405     def to_console_title(self, message):
 406         if not self.params.get('consoletitle', False):
 407             return
 408         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 409             # c_wchar_p() might not be necessary if `message` is
 410             # already of type unicode()
 411             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 412         elif 'TERM' in os.environ:
 413             self._write_string('\033]0;%s\007' % message, self._screen_file)
 414
 415     def save_console_title(self):
 416         if not self.params.get('consoletitle', False):
 417             return
 418         if 'TERM' in os.environ:
 419             # Save the title on stack
 420             self._write_string('\033[22;0t', self._screen_file)
 421
 422     def restore_console_title(self):
 423         if not self.params.get('consoletitle', False):
 424             return
 425         if 'TERM' in os.environ:
 426             # Restore the title from stack
 427             self._write_string('\033[23;0t', self._screen_file)
 428
 429     def __enter__(self):
 430         self.save_console_title()
 431         return self
 432
 433     def __exit__(self, *args):
 434         self.restore_console_title()
 435
 436         if self.params.get('cookiefile') is not None:
 437             self.cookiejar.save()
 438
 439     def trouble(self, message=None, tb=None):
 440         """Determine action to take when a download problem appears.
 441
 442         Depending on if the downloader has been configured to ignore
 443         download errors or not, this method may throw an exception or
 444         not when errors are found, after printing the message.
 445
 446         tb, if given, is additional traceback information.
 447         """
 448         if message is not None:
 449             self.to_stderr(message)
 450         if self.params.get('verbose'):
 451             if tb is None:
 452                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 453                     tb = ''
 454                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 455                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 456                     tb += compat_str(traceback.format_exc())
 457                 else:
 458                     tb_data = traceback.format_list(traceback.extract_stack())
 459                     tb = ''.join(tb_data)
 460             self.to_stderr(tb)
 461         if not self.params.get('ignoreerrors', False):
 462             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 463                 exc_info = sys.exc_info()[1].exc_info
 464             else:
 465                 exc_info = sys.exc_info()
 466             raise DownloadError(message, exc_info)
 467         self._download_retcode = 1
 468
 469     def report_warning(self, message):
 470         '''
 471         Print the message to stderr, it will be prefixed with 'WARNING:'
 472         If stderr is a tty file the 'WARNING:' will be colored
 473         '''
 474         if self.params.get('logger') is not None:
 475             self.params['logger'].warning(message)
 476         else:
 477             if self.params.get('no_warnings'):
 478                 return
 479             if self._err_file.isatty() and os.name != 'nt':
 480                 _msg_header = '\033[0;33mWARNING:\033[0m'
 481             else:
 482                 _msg_header = 'WARNING:'
 483             warning_message = '%s %s' % (_msg_header, message)
 484             self.to_stderr(warning_message)
 485
 486     def report_error(self, message, tb=None):
 487         '''
 488         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 489         in red if stderr is a tty file.
 490         '''
 491         if self._err_file.isatty() and os.name != 'nt':
 492             _msg_header = '\033[0;31mERROR:\033[0m'
 493         else:
 494             _msg_header = 'ERROR:'
 495         error_message = '%s %s' % (_msg_header, message)
 496         self.trouble(error_message, tb)
 497
 498     def report_file_already_downloaded(self, file_name):
 499         """Report file has already been fully downloaded."""
 500         try:
 501             self.to_screen('[download] %s has already been downloaded' % file_name)
 502         except UnicodeEncodeError:
 503             self.to_screen('[download] The file has already been downloaded')
 504
 505     def prepare_filename(self, info_dict):
 506         """Generate the output filename."""
 507         try:
 508             template_dict = dict(info_dict)
 509
 510             template_dict['epoch'] = int(time.time())
 511             autonumber_size = self.params.get('autonumber_size')
 512             if autonumber_size is None:
 513                 autonumber_size = 5
 514             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 515             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 516             if template_dict.get('playlist_index') is not None:
 517                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 518             if template_dict.get('resolution') is None:
 519                 if template_dict.get('width') and template_dict.get('height'):
 520                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 521                 elif template_dict.get('height'):
 522                     template_dict['resolution'] = '%sp' % template_dict['height']
 523                 elif template_dict.get('width'):
 524                     template_dict['resolution'] = '?x%d' % template_dict['width']
 525
 526             sanitize = lambda k, v: sanitize_filename(
 527                 compat_str(v),
 528                 restricted=self.params.get('restrictfilenames'),
 529                 is_id=(k == 'id'))
 530             template_dict = dict((k, sanitize(k, v))
 531                                  for k, v in template_dict.items()
 532                                  if v is not None)
 533             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 534
 535             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 536             tmpl = compat_expanduser(outtmpl)
 537             filename = tmpl % template_dict
 538             return filename
 539         except ValueError as err:
 540             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 541             return None
 542
 543     def _match_entry(self, info_dict):
 544         """ Returns None iff the file should be downloaded """
 545
 546         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 547         if 'title' in info_dict:
 548             # This can happen when we're just evaluating the playlist
 549             title = info_dict['title']
 550             matchtitle = self.params.get('matchtitle', False)
 551             if matchtitle:
 552                 if not re.search(matchtitle, title, re.IGNORECASE):
 553                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 554             rejecttitle = self.params.get('rejecttitle', False)
 555             if rejecttitle:
 556                 if re.search(rejecttitle, title, re.IGNORECASE):
 557                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 558         date = info_dict.get('upload_date', None)
 559         if date is not None:
 560             dateRange = self.params.get('daterange', DateRange())
 561             if date not in dateRange:
 562                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 563         view_count = info_dict.get('view_count', None)
 564         if view_count is not None:
 565             min_views = self.params.get('min_views')
 566             if min_views is not None and view_count < min_views:
 567                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 568             max_views = self.params.get('max_views')
 569             if max_views is not None and view_count > max_views:
 570                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 571         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 572             return 'Skipping "%s" because it is age restricted' % title
 573         if self.in_download_archive(info_dict):
 574             return '%s has already been recorded in archive' % video_title
 575         return None
 576
 577     @staticmethod
 578     def add_extra_info(info_dict, extra_info):
 579         '''Set the keys from extra_info in info dict if they are missing'''
 580         for key, value in extra_info.items():
 581             info_dict.setdefault(key, value)
 582
 583     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 584                      process=True):
 585         '''
 586         Returns a list with a dictionary for each video we find.
 587         If 'download', also downloads the videos.
 588         extra_info is a dict containing the extra values to add to each result
 589          '''
 590
 591         if ie_key:
 592             ies = [self.get_info_extractor(ie_key)]
 593         else:
 594             ies = self._ies
 595
 596         for ie in ies:
 597             if not ie.suitable(url):
 598                 continue
 599
 600             if not ie.working():
 601                 self.report_warning('The program functionality for this site has been marked as broken, '
 602                                     'and will probably not work.')
 603
 604             try:
 605                 ie_result = ie.extract(url)
 606                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 607                     break
 608                 if isinstance(ie_result, list):
 609                     # Backwards compatibility: old IE result format
 610                     ie_result = {
 611                         '_type': 'compat_list',
 612                         'entries': ie_result,
 613                     }
 614                 self.add_default_extra_info(ie_result, ie, url)
 615                 if process:
 616                     return self.process_ie_result(ie_result, download, extra_info)
 617                 else:
 618                     return ie_result
 619             except ExtractorError as de:  # An error we somewhat expected
 620                 self.report_error(compat_str(de), de.format_traceback())
 621                 break
 622             except MaxDownloadsReached:
 623                 raise
 624             except Exception as e:
 625                 if self.params.get('ignoreerrors', False):
 626                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 627                     break
 628                 else:
 629                     raise
 630         else:
 631             self.report_error('no suitable InfoExtractor for URL %s' % url)
 632
 633     def add_default_extra_info(self, ie_result, ie, url):
 634         self.add_extra_info(ie_result, {
 635             'extractor': ie.IE_NAME,
 636             'webpage_url': url,
 637             'webpage_url_basename': url_basename(url),
 638             'extractor_key': ie.ie_key(),
 639         })
 640
 641     def process_ie_result(self, ie_result, download=True, extra_info={}):
 642         """
 643         Take the result of the ie(may be modified) and resolve all unresolved
 644         references (URLs, playlist items).
 645
 646         It will also download the videos if 'download'.
 647         Returns the resolved ie_result.
 648         """
 649
 650         result_type = ie_result.get('_type', 'video')
 651
 652         if result_type in ('url', 'url_transparent'):
 653             extract_flat = self.params.get('extract_flat', False)
 654             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 655                     extract_flat is True):
 656                 if self.params.get('forcejson', False):
 657                     self.to_stdout(json.dumps(ie_result))
 658                 return ie_result
 659
 660         if result_type == 'video':
 661             self.add_extra_info(ie_result, extra_info)
 662             return self.process_video_result(ie_result, download=download)
 663         elif result_type == 'url':
 664             # We have to add extra_info to the results because it may be
 665             # contained in a playlist
 666             return self.extract_info(ie_result['url'],
 667                                      download,
 668                                      ie_key=ie_result.get('ie_key'),
 669                                      extra_info=extra_info)
 670         elif result_type == 'url_transparent':
 671             # Use the information from the embedding page
 672             info = self.extract_info(
 673                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 674                 extra_info=extra_info, download=False, process=False)
 675
 676             force_properties = dict(
 677                 (k, v) for k, v in ie_result.items() if v is not None)
 678             for f in ('_type', 'url'):
 679                 if f in force_properties:
 680                     del force_properties[f]
 681             new_result = info.copy()
 682             new_result.update(force_properties)
 683
 684             assert new_result.get('_type') != 'url_transparent'
 685
 686             return self.process_ie_result(
 687                 new_result, download=download, extra_info=extra_info)
 688         elif result_type == 'playlist' or result_type == 'multi_video':
 689             # We process each entry in the playlist
 690             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 691             self.to_screen('[download] Downloading playlist: %s' % playlist)
 692
 693             playlist_results = []
 694
 695             playliststart = self.params.get('playliststart', 1) - 1
 696             playlistend = self.params.get('playlistend', None)
 697             # For backwards compatibility, interpret -1 as whole list
 698             if playlistend == -1:
 699                 playlistend = None
 700
 701             ie_entries = ie_result['entries']
 702             if isinstance(ie_entries, list):
 703                 n_all_entries = len(ie_entries)
 704                 entries = ie_entries[playliststart:playlistend]
 705                 n_entries = len(entries)
 706                 self.to_screen(
 707                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 708                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 709             elif isinstance(ie_entries, PagedList):
 710                 entries = ie_entries.getslice(
 711                     playliststart, playlistend)
 712                 n_entries = len(entries)
 713                 self.to_screen(
 714                     "[%s] playlist %s: Downloading %d videos" %
 715                     (ie_result['extractor'], playlist, n_entries))
 716             else:  # iterable
 717                 entries = list(itertools.islice(
 718                     ie_entries, playliststart, playlistend))
 719                 n_entries = len(entries)
 720                 self.to_screen(
 721                     "[%s] playlist %s: Downloading %d videos" %
 722                     (ie_result['extractor'], playlist, n_entries))
 723
 724             if self.params.get('playlistreverse', False):
 725                 entries = entries[::-1]
 726
 727             for i, entry in enumerate(entries, 1):
 728                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 729                 extra = {
 730                     'n_entries': n_entries,
 731                     'playlist': playlist,
 732                     'playlist_id': ie_result.get('id'),
 733                     'playlist_title': ie_result.get('title'),
 734                     'playlist_index': i + playliststart,
 735                     'extractor': ie_result['extractor'],
 736                     'webpage_url': ie_result['webpage_url'],
 737                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 738                     'extractor_key': ie_result['extractor_key'],
 739                 }
 740
 741                 reason = self._match_entry(entry)
 742                 if reason is not None:
 743                     self.to_screen('[download] ' + reason)
 744                     continue
 745
 746                 entry_result = self.process_ie_result(entry,
 747                                                       download=download,
 748                                                       extra_info=extra)
 749                 playlist_results.append(entry_result)
 750             ie_result['entries'] = playlist_results
 751             return ie_result
 752         elif result_type == 'compat_list':
 753             self.report_warning(
 754                 'Extractor %s returned a compat_list result. '
 755                 'It needs to be updated.' % ie_result.get('extractor'))
 756
 757             def _fixup(r):
 758                 self.add_extra_info(
 759                     r,
 760                     {
 761                         'extractor': ie_result['extractor'],
 762                         'webpage_url': ie_result['webpage_url'],
 763                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 764                         'extractor_key': ie_result['extractor_key'],
 765                     }
 766                 )
 767                 return r
 768             ie_result['entries'] = [
 769                 self.process_ie_result(_fixup(r), download, extra_info)
 770                 for r in ie_result['entries']
 771             ]
 772             return ie_result
 773         else:
 774             raise Exception('Invalid result type: %s' % result_type)
 775
 776     def _apply_format_filter(self, format_spec, available_formats):
 777         " Returns a tuple of the remaining format_spec and filtered formats "
 778
 779         OPERATORS = {
 780             '<': operator.lt,
 781             '<=': operator.le,
 782             '>': operator.gt,
 783             '>=': operator.ge,
 784             '=': operator.eq,
 785             '!=': operator.ne,
 786         }
 787         operator_rex = re.compile(r'''(?x)\s*\[
 788             (?P<key>width|height|tbr|abr|vbr|filesize)
 789             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 790             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 791             \]$
 792             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 793         m = operator_rex.search(format_spec)
 794         if not m:
 795             raise ValueError('Invalid format specification %r' % format_spec)
 796
 797         try:
 798             comparison_value = int(m.group('value'))
 799         except ValueError:
 800             comparison_value = parse_filesize(m.group('value'))
 801             if comparison_value is None:
 802                 comparison_value = parse_filesize(m.group('value') + 'B')
 803             if comparison_value is None:
 804                 raise ValueError(
 805                     'Invalid value %r in format specification %r' % (
 806                         m.group('value'), format_spec))
 807         op = OPERATORS[m.group('op')]
 808
 809         def _filter(f):
 810             actual_value = f.get(m.group('key'))
 811             if actual_value is None:
 812                 return m.group('none_inclusive')
 813             return op(actual_value, comparison_value)
 814         new_formats = [f for f in available_formats if _filter(f)]
 815
 816         new_format_spec = format_spec[:-len(m.group(0))]
 817         if not new_format_spec:
 818             new_format_spec = 'best'
 819
 820         return (new_format_spec, new_formats)
 821
 822     def select_format(self, format_spec, available_formats):
 823         while format_spec.endswith(']'):
 824             format_spec, available_formats = self._apply_format_filter(
 825                 format_spec, available_formats)
 826         if not available_formats:
 827             return None
 828
 829         if format_spec == 'best' or format_spec is None:
 830             return available_formats[-1]
 831         elif format_spec == 'worst':
 832             return available_formats[0]
 833         elif format_spec == 'bestaudio':
 834             audio_formats = [
 835                 f for f in available_formats
 836                 if f.get('vcodec') == 'none']
 837             if audio_formats:
 838                 return audio_formats[-1]
 839         elif format_spec == 'worstaudio':
 840             audio_formats = [
 841                 f for f in available_formats
 842                 if f.get('vcodec') == 'none']
 843             if audio_formats:
 844                 return audio_formats[0]
 845         elif format_spec == 'bestvideo':
 846             video_formats = [
 847                 f for f in available_formats
 848                 if f.get('acodec') == 'none']
 849             if video_formats:
 850                 return video_formats[-1]
 851         elif format_spec == 'worstvideo':
 852             video_formats = [
 853                 f for f in available_formats
 854                 if f.get('acodec') == 'none']
 855             if video_formats:
 856                 return video_formats[0]
 857         else:
 858             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 859             if format_spec in extensions:
 860                 filter_f = lambda f: f['ext'] == format_spec
 861             else:
 862                 filter_f = lambda f: f['format_id'] == format_spec
 863             matches = list(filter(filter_f, available_formats))
 864             if matches:
 865                 return matches[-1]
 866         return None
 867
 868     def process_video_result(self, info_dict, download=True):
 869         assert info_dict.get('_type', 'video') == 'video'
 870
 871         if 'id' not in info_dict:
 872             raise ExtractorError('Missing "id" field in extractor result')
 873         if 'title' not in info_dict:
 874             raise ExtractorError('Missing "title" field in extractor result')
 875
 876         if 'playlist' not in info_dict:
 877             # It isn't part of a playlist
 878             info_dict['playlist'] = None
 879             info_dict['playlist_index'] = None
 880
 881         thumbnails = info_dict.get('thumbnails')
 882         if thumbnails:
 883             thumbnails.sort(key=lambda t: (
 884                 t.get('width'), t.get('height'), t.get('url')))
 885             for t in thumbnails:
 886                 if 'width' in t and 'height' in t:
 887                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 888
 889         if thumbnails and 'thumbnail' not in info_dict:
 890             info_dict['thumbnail'] = thumbnails[-1]['url']
 891
 892         if 'display_id' not in info_dict and 'id' in info_dict:
 893             info_dict['display_id'] = info_dict['id']
 894
 895         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 896             # Working around negative timestamps in Windows
 897             # (see http://bugs.python.org/issue1646728)
 898             if info_dict['timestamp'] < 0 and os.name == 'nt':
 899                 info_dict['timestamp'] = 0
 900             upload_date = datetime.datetime.utcfromtimestamp(
 901                 info_dict['timestamp'])
 902             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 903
 904         # This extractors handle format selection themselves
 905         if info_dict['extractor'] in ['Youku']:
 906             if download:
 907                 self.process_info(info_dict)
 908             return info_dict
 909
 910         # We now pick which formats have to be downloaded
 911         if info_dict.get('formats') is None:
 912             # There's only one format available
 913             formats = [info_dict]
 914         else:
 915             formats = info_dict['formats']
 916
 917         if not formats:
 918             raise ExtractorError('No video formats found!')
 919
 920         # We check that all the formats have the format and format_id fields
 921         for i, format in enumerate(formats):
 922             if 'url' not in format:
 923                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 924
 925             if format.get('format_id') is None:
 926                 format['format_id'] = compat_str(i)
 927             if format.get('format') is None:
 928                 format['format'] = '{id} - {res}{note}'.format(
 929                     id=format['format_id'],
 930                     res=self.format_resolution(format),
 931                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 932                 )
 933             # Automatically determine file extension if missing
 934             if 'ext' not in format:
 935                 format['ext'] = determine_ext(format['url']).lower()
 936
 937         format_limit = self.params.get('format_limit', None)
 938         if format_limit:
 939             formats = list(takewhile_inclusive(
 940                 lambda f: f['format_id'] != format_limit, formats
 941             ))
 942
 943         # TODO Central sorting goes here
 944
 945         if formats[0] is not info_dict:
 946             # only set the 'formats' fields if the original info_dict list them
 947             # otherwise we end up with a circular reference, the first (and unique)
 948             # element in the 'formats' field in info_dict is info_dict itself,
 949             # wich can't be exported to json
 950             info_dict['formats'] = formats
 951         if self.params.get('listformats', None):
 952             self.list_formats(info_dict)
 953             return
 954
 955         req_format = self.params.get('format')
 956         if req_format is None:
 957             req_format = 'best'
 958         formats_to_download = []
 959         # The -1 is for supporting YoutubeIE
 960         if req_format in ('-1', 'all'):
 961             formats_to_download = formats
 962         else:
 963             for rfstr in req_format.split(','):
 964                 # We can accept formats requested in the format: 34/5/best, we pick
 965                 # the first that is available, starting from left
 966                 req_formats = rfstr.split('/')
 967                 for rf in req_formats:
 968                     if re.match(r'.+?\+.+?', rf) is not None:
 969                         # Two formats have been requested like '137+139'
 970                         format_1, format_2 = rf.split('+')
 971                         formats_info = (self.select_format(format_1, formats),
 972                                         self.select_format(format_2, formats))
 973                         if all(formats_info):
 974                             # The first format must contain the video and the
 975                             # second the audio
 976                             if formats_info[0].get('vcodec') == 'none':
 977                                 self.report_error('The first format must '
 978                                                   'contain the video, try using '
 979                                                   '"-f %s+%s"' % (format_2, format_1))
 980                                 return
 981                             output_ext = (
 982                                 formats_info[0]['ext']
 983                                 if self.params.get('merge_output_format') is None
 984                                 else self.params['merge_output_format'])
 985                             selected_format = {
 986                                 'requested_formats': formats_info,
 987                                 'format': rf,
 988                                 'ext': formats_info[0]['ext'],
 989                                 'width': formats_info[0].get('width'),
 990                                 'height': formats_info[0].get('height'),
 991                                 'resolution': formats_info[0].get('resolution'),
 992                                 'fps': formats_info[0].get('fps'),
 993                                 'vcodec': formats_info[0].get('vcodec'),
 994                                 'vbr': formats_info[0].get('vbr'),
 995                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
 996                                 'acodec': formats_info[1].get('acodec'),
 997                                 'abr': formats_info[1].get('abr'),
 998                                 'ext': output_ext,
 999                             }
1000                         else:
1001                             selected_format = None
1002                     else:
1003                         selected_format = self.select_format(rf, formats)
1004                     if selected_format is not None:
1005                         formats_to_download.append(selected_format)
1006                         break
1007         if not formats_to_download:
1008             raise ExtractorError('requested format not available',
1009                                  expected=True)
1010
1011         if download:
1012             if len(formats_to_download) > 1:
1013                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1014             for format in formats_to_download:
1015                 new_info = dict(info_dict)
1016                 new_info.update(format)
1017                 self.process_info(new_info)
1018         # We update the info dict with the best quality format (backwards compatibility)
1019         info_dict.update(formats_to_download[-1])
1020         return info_dict
1021
1022     def process_info(self, info_dict):
1023         """Process a single resolved IE result."""
1024
1025         assert info_dict.get('_type', 'video') == 'video'
1026
1027         max_downloads = self.params.get('max_downloads')
1028         if max_downloads is not None:
1029             if self._num_downloads >= int(max_downloads):
1030                 raise MaxDownloadsReached()
1031
1032         info_dict['fulltitle'] = info_dict['title']
1033         if len(info_dict['title']) > 200:
1034             info_dict['title'] = info_dict['title'][:197] + '...'
1035
1036         # Keep for backwards compatibility
1037         info_dict['stitle'] = info_dict['title']
1038
1039         if 'format' not in info_dict:
1040             info_dict['format'] = info_dict['ext']
1041
1042         reason = self._match_entry(info_dict)
1043         if reason is not None:
1044             self.to_screen('[download] ' + reason)
1045             return
1046
1047         self._num_downloads += 1
1048
1049         filename = self.prepare_filename(info_dict)
1050
1051         # Forced printings
1052         if self.params.get('forcetitle', False):
1053             self.to_stdout(info_dict['fulltitle'])
1054         if self.params.get('forceid', False):
1055             self.to_stdout(info_dict['id'])
1056         if self.params.get('forceurl', False):
1057             if info_dict.get('requested_formats') is not None:
1058                 for f in info_dict['requested_formats']:
1059                     self.to_stdout(f['url'] + f.get('play_path', ''))
1060             else:
1061                 # For RTMP URLs, also include the playpath
1062                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1063         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1064             self.to_stdout(info_dict['thumbnail'])
1065         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1066             self.to_stdout(info_dict['description'])
1067         if self.params.get('forcefilename', False) and filename is not None:
1068             self.to_stdout(filename)
1069         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1070             self.to_stdout(formatSeconds(info_dict['duration']))
1071         if self.params.get('forceformat', False):
1072             self.to_stdout(info_dict['format'])
1073         if self.params.get('forcejson', False):
1074             info_dict['_filename'] = filename
1075             self.to_stdout(json.dumps(info_dict))
1076         if self.params.get('dump_single_json', False):
1077             info_dict['_filename'] = filename
1078
1079         # Do nothing else if in simulate mode
1080         if self.params.get('simulate', False):
1081             return
1082
1083         if filename is None:
1084             return
1085
1086         try:
1087             dn = os.path.dirname(encodeFilename(filename))
1088             if dn and not os.path.exists(dn):
1089                 os.makedirs(dn)
1090         except (OSError, IOError) as err:
1091             self.report_error('unable to create directory ' + compat_str(err))
1092             return
1093
1094         if self.params.get('writedescription', False):
1095             descfn = filename + '.description'
1096             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1097                 self.to_screen('[info] Video description is already present')
1098             elif info_dict.get('description') is None:
1099                 self.report_warning('There\'s no description to write.')
1100             else:
1101                 try:
1102                     self.to_screen('[info] Writing video description to: ' + descfn)
1103                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1104                         descfile.write(info_dict['description'])
1105                 except (OSError, IOError):
1106                     self.report_error('Cannot write description file ' + descfn)
1107                     return
1108
1109         if self.params.get('writeannotations', False):
1110             annofn = filename + '.annotations.xml'
1111             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1112                 self.to_screen('[info] Video annotations are already present')
1113             else:
1114                 try:
1115                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1116                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1117                         annofile.write(info_dict['annotations'])
1118                 except (KeyError, TypeError):
1119                     self.report_warning('There are no annotations to write.')
1120                 except (OSError, IOError):
1121                     self.report_error('Cannot write annotations file: ' + annofn)
1122                     return
1123
1124         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1125                                        self.params.get('writeautomaticsub')])
1126
1127         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1128             # subtitles download errors are already managed as troubles in relevant IE
1129             # that way it will silently go on when used with unsupporting IE
1130             subtitles = info_dict['subtitles']
1131             sub_format = self.params.get('subtitlesformat', 'srt')
1132             for sub_lang in subtitles.keys():
1133                 sub = subtitles[sub_lang]
1134                 if sub is None:
1135                     continue
1136                 try:
1137                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1138                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1139                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1140                     else:
1141                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1142                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1143                             subfile.write(sub)
1144                 except (OSError, IOError):
1145                     self.report_error('Cannot write subtitles file ' + sub_filename)
1146                     return
1147
1148         if self.params.get('writeinfojson', False):
1149             infofn = os.path.splitext(filename)[0] + '.info.json'
1150             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1151                 self.to_screen('[info] Video description metadata is already present')
1152             else:
1153                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1154                 try:
1155                     write_json_file(info_dict, infofn)
1156                 except (OSError, IOError):
1157                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1158                     return
1159
1160         if self.params.get('writethumbnail', False):
1161             if info_dict.get('thumbnail') is not None:
1162                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1163                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1164                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1165                     self.to_screen('[%s] %s: Thumbnail is already present' %
1166                                    (info_dict['extractor'], info_dict['id']))
1167                 else:
1168                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1169                                    (info_dict['extractor'], info_dict['id']))
1170                     try:
1171                         uf = self.urlopen(info_dict['thumbnail'])
1172                         with open(thumb_filename, 'wb') as thumbf:
1173                             shutil.copyfileobj(uf, thumbf)
1174                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1175                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1176                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1177                         self.report_warning('Unable to download thumbnail "%s": %s' %
1178                                             (info_dict['thumbnail'], compat_str(err)))
1179
1180         if not self.params.get('skip_download', False):
1181             try:
1182                 def dl(name, info):
1183                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1184                     for ph in self._progress_hooks:
1185                         fd.add_progress_hook(ph)
1186                     if self.params.get('verbose'):
1187                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1188                     return fd.download(name, info)
1189                 if info_dict.get('requested_formats') is not None:
1190                     downloaded = []
1191                     success = True
1192                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1193                     if not merger._executable:
1194                         postprocessors = []
1195                         self.report_warning('You have requested multiple '
1196                                             'formats but ffmpeg or avconv are not installed.'
1197                                             ' The formats won\'t be merged')
1198                     else:
1199                         postprocessors = [merger]
1200                     for f in info_dict['requested_formats']:
1201                         new_info = dict(info_dict)
1202                         new_info.update(f)
1203                         fname = self.prepare_filename(new_info)
1204                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1205                         downloaded.append(fname)
1206                         partial_success = dl(fname, new_info)
1207                         success = success and partial_success
1208                     info_dict['__postprocessors'] = postprocessors
1209                     info_dict['__files_to_merge'] = downloaded
1210                 else:
1211                     # Just a single file
1212                     success = dl(filename, info_dict)
1213             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1214                 self.report_error('unable to download video data: %s' % str(err))
1215                 return
1216             except (OSError, IOError) as err:
1217                 raise UnavailableVideoError(err)
1218             except (ContentTooShortError, ) as err:
1219                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1220                 return
1221
1222             if success:
1223                 # Fixup content
1224                 fixup_policy = self.params.get('fixup')
1225                 if fixup_policy is None:
1226                     fixup_policy = 'detect_or_warn'
1227
1228                 stretched_ratio = info_dict.get('stretched_ratio')
1229                 if stretched_ratio is not None and stretched_ratio != 1:
1230                     if fixup_policy == 'warn':
1231                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1232                             info_dict['id'], stretched_ratio))
1233                     elif fixup_policy == 'detect_or_warn':
1234                         stretched_pp = FFmpegFixupStretchedPP(self)
1235                         if stretched_pp.available:
1236                             info_dict.setdefault('__postprocessors', [])
1237                             info_dict['__postprocessors'].append(stretched_pp)
1238                         else:
1239                             self.report_warning(
1240                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1241                                     info_dict['id'], stretched_ratio))
1242                     else:
1243                         assert fixup_policy in ('ignore', 'never')
1244
1245                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1246                     if fixup_policy == 'warn':
1247                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1248                             info_dict['id']))
1249                     elif fixup_policy == 'detect_or_warn':
1250                         fixup_pp = FFmpegFixupM4aPP(self)
1251                         if fixup_pp.available:
1252                             info_dict.setdefault('__postprocessors', [])
1253                             info_dict['__postprocessors'].append(fixup_pp)
1254                         else:
1255                             self.report_warning(
1256                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1257                                     info_dict['id']))
1258                     else:
1259                         assert fixup_policy in ('ignore', 'never')
1260
1261                 try:
1262                     self.post_process(filename, info_dict)
1263                 except (PostProcessingError) as err:
1264                     self.report_error('postprocessing: %s' % str(err))
1265                     return
1266                 self.record_download_archive(info_dict)
1267
1268     def download(self, url_list):
1269         """Download a given list of URLs."""
1270         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1271         if (len(url_list) > 1 and
1272                 '%' not in outtmpl
1273                 and self.params.get('max_downloads') != 1):
1274             raise SameFileError(outtmpl)
1275
1276         for url in url_list:
1277             try:
1278                 # It also downloads the videos
1279                 res = self.extract_info(url)
1280             except UnavailableVideoError:
1281                 self.report_error('unable to download video')
1282             except MaxDownloadsReached:
1283                 self.to_screen('[info] Maximum number of downloaded files reached.')
1284                 raise
1285             else:
1286                 if self.params.get('dump_single_json', False):
1287                     self.to_stdout(json.dumps(res))
1288
1289         return self._download_retcode
1290
1291     def download_with_info_file(self, info_filename):
1292         with io.open(info_filename, 'r', encoding='utf-8') as f:
1293             info = json.load(f)
1294         try:
1295             self.process_ie_result(info, download=True)
1296         except DownloadError:
1297             webpage_url = info.get('webpage_url')
1298             if webpage_url is not None:
1299                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1300                 return self.download([webpage_url])
1301             else:
1302                 raise
1303         return self._download_retcode
1304
1305     def post_process(self, filename, ie_info):
1306         """Run all the postprocessors on the given file."""
1307         info = dict(ie_info)
1308         info['filepath'] = filename
1309         pps_chain = []
1310         if ie_info.get('__postprocessors') is not None:
1311             pps_chain.extend(ie_info['__postprocessors'])
1312         pps_chain.extend(self._pps)
1313         for pp in pps_chain:
1314             keep_video = None
1315             old_filename = info['filepath']
1316             try:
1317                 keep_video_wish, info = pp.run(info)
1318                 if keep_video_wish is not None:
1319                     if keep_video_wish:
1320                         keep_video = keep_video_wish
1321                     elif keep_video is None:
1322                         # No clear decision yet, let IE decide
1323                         keep_video = keep_video_wish
1324             except PostProcessingError as e:
1325                 self.report_error(e.msg)
1326             if keep_video is False and not self.params.get('keepvideo', False):
1327                 try:
1328                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1329                     os.remove(encodeFilename(old_filename))
1330                 except (IOError, OSError):
1331                     self.report_warning('Unable to remove downloaded video file')
1332
1333     def _make_archive_id(self, info_dict):
1334         # Future-proof against any change in case
1335         # and backwards compatibility with prior versions
1336         extractor = info_dict.get('extractor_key')
1337         if extractor is None:
1338             if 'id' in info_dict:
1339                 extractor = info_dict.get('ie_key')  # key in a playlist
1340         if extractor is None:
1341             return None  # Incomplete video information
1342         return extractor.lower() + ' ' + info_dict['id']
1343
1344     def in_download_archive(self, info_dict):
1345         fn = self.params.get('download_archive')
1346         if fn is None:
1347             return False
1348
1349         vid_id = self._make_archive_id(info_dict)
1350         if vid_id is None:
1351             return False  # Incomplete video information
1352
1353         try:
1354             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1355                 for line in archive_file:
1356                     if line.strip() == vid_id:
1357                         return True
1358         except IOError as ioe:
1359             if ioe.errno != errno.ENOENT:
1360                 raise
1361         return False
1362
1363     def record_download_archive(self, info_dict):
1364         fn = self.params.get('download_archive')
1365         if fn is None:
1366             return
1367         vid_id = self._make_archive_id(info_dict)
1368         assert vid_id
1369         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1370             archive_file.write(vid_id + '\n')
1371
1372     @staticmethod
1373     def format_resolution(format, default='unknown'):
1374         if format.get('vcodec') == 'none':
1375             return 'audio only'
1376         if format.get('resolution') is not None:
1377             return format['resolution']
1378         if format.get('height') is not None:
1379             if format.get('width') is not None:
1380                 res = '%sx%s' % (format['width'], format['height'])
1381             else:
1382                 res = '%sp' % format['height']
1383         elif format.get('width') is not None:
1384             res = '?x%d' % format['width']
1385         else:
1386             res = default
1387         return res
1388
1389     def _format_note(self, fdict):
1390         res = ''
1391         if fdict.get('ext') in ['f4f', 'f4m']:
1392             res += '(unsupported) '
1393         if fdict.get('format_note') is not None:
1394             res += fdict['format_note'] + ' '
1395         if fdict.get('tbr') is not None:
1396             res += '%4dk ' % fdict['tbr']
1397         if fdict.get('container') is not None:
1398             if res:
1399                 res += ', '
1400             res += '%s container' % fdict['container']
1401         if (fdict.get('vcodec') is not None and
1402                 fdict.get('vcodec') != 'none'):
1403             if res:
1404                 res += ', '
1405             res += fdict['vcodec']
1406             if fdict.get('vbr') is not None:
1407                 res += '@'
1408         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1409             res += 'video@'
1410         if fdict.get('vbr') is not None:
1411             res += '%4dk' % fdict['vbr']
1412         if fdict.get('fps') is not None:
1413             res += ', %sfps' % fdict['fps']
1414         if fdict.get('acodec') is not None:
1415             if res:
1416                 res += ', '
1417             if fdict['acodec'] == 'none':
1418                 res += 'video only'
1419             else:
1420                 res += '%-5s' % fdict['acodec']
1421         elif fdict.get('abr') is not None:
1422             if res:
1423                 res += ', '
1424             res += 'audio'
1425         if fdict.get('abr') is not None:
1426             res += '@%3dk' % fdict['abr']
1427         if fdict.get('asr') is not None:
1428             res += ' (%5dHz)' % fdict['asr']
1429         if fdict.get('filesize') is not None:
1430             if res:
1431                 res += ', '
1432             res += format_bytes(fdict['filesize'])
1433         elif fdict.get('filesize_approx') is not None:
1434             if res:
1435                 res += ', '
1436             res += '~' + format_bytes(fdict['filesize_approx'])
1437         return res
1438
1439     def list_formats(self, info_dict):
1440         def line(format, idlen=20):
1441             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1442                 format['format_id'],
1443                 format['ext'],
1444                 self.format_resolution(format),
1445                 self._format_note(format),
1446             ))
1447
1448         formats = info_dict.get('formats', [info_dict])
1449         idlen = max(len('format code'),
1450                     max(len(f['format_id']) for f in formats))
1451         formats_s = [
1452             line(f, idlen) for f in formats
1453             if f.get('preference') is None or f['preference'] >= -1000]
1454         if len(formats) > 1:
1455             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1456             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1457
1458         header_line = line({
1459             'format_id': 'format code', 'ext': 'extension',
1460             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1461         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1462                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1463
1464     def urlopen(self, req):
1465         """ Start an HTTP download """
1466
1467         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1468         # always respected by websites, some tend to give out URLs with non percent-encoded
1469         # non-ASCII characters (see telemb.py, ard.py [#3412])
1470         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1471         # To work around aforementioned issue we will replace request's original URL with
1472         # percent-encoded one
1473         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1474         url = req if req_is_string else req.get_full_url()
1475         url_escaped = escape_url(url)
1476
1477         # Substitute URL if any change after escaping
1478         if url != url_escaped:
1479             if req_is_string:
1480                 req = url_escaped
1481             else:
1482                 req = compat_urllib_request.Request(
1483                     url_escaped, data=req.data, headers=req.headers,
1484                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1485
1486         return self._opener.open(req, timeout=self._socket_timeout)
1487
1488     def print_debug_header(self):
1489         if not self.params.get('verbose'):
1490             return
1491
1492         if type('') is not compat_str:
1493             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1494             self.report_warning(
1495                 'Your Python is broken! Update to a newer and supported version')
1496
1497         stdout_encoding = getattr(
1498             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1499         encoding_str = (
1500             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1501                 locale.getpreferredencoding(),
1502                 sys.getfilesystemencoding(),
1503                 stdout_encoding,
1504                 self.get_encoding()))
1505         write_string(encoding_str, encoding=None)
1506
1507         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1508         try:
1509             sp = subprocess.Popen(
1510                 ['git', 'rev-parse', '--short', 'HEAD'],
1511                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1512                 cwd=os.path.dirname(os.path.abspath(__file__)))
1513             out, err = sp.communicate()
1514             out = out.decode().strip()
1515             if re.match('[0-9a-f]+', out):
1516                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1517         except:
1518             try:
1519                 sys.exc_clear()
1520             except:
1521                 pass
1522         self._write_string('[debug] Python version %s - %s\n' % (
1523             platform.python_version(), platform_name()))
1524
1525         exe_versions = FFmpegPostProcessor.get_versions()
1526         exe_versions['rtmpdump'] = rtmpdump_version()
1527         exe_str = ', '.join(
1528             '%s %s' % (exe, v)
1529             for exe, v in sorted(exe_versions.items())
1530             if v
1531         )
1532         if not exe_str:
1533             exe_str = 'none'
1534         self._write_string('[debug] exe versions: %s\n' % exe_str)
1535
1536         proxy_map = {}
1537         for handler in self._opener.handlers:
1538             if hasattr(handler, 'proxies'):
1539                 proxy_map.update(handler.proxies)
1540         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1541
1542         if self.params.get('call_home', False):
1543             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1544             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1545             latest_version = self.urlopen(
1546                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1547             if version_tuple(latest_version) > version_tuple(__version__):
1548                 self.report_warning(
1549                     'You are using an outdated version (newest version: %s)! '
1550                     'See https://yt-dl.org/update if you need help updating.' %
1551                     latest_version)
1552
1553     def _setup_opener(self):
1554         timeout_val = self.params.get('socket_timeout')
1555         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1556
1557         opts_cookiefile = self.params.get('cookiefile')
1558         opts_proxy = self.params.get('proxy')
1559
1560         if opts_cookiefile is None:
1561             self.cookiejar = compat_cookiejar.CookieJar()
1562         else:
1563             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1564                 opts_cookiefile)
1565             if os.access(opts_cookiefile, os.R_OK):
1566                 self.cookiejar.load()
1567
1568         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1569             self.cookiejar)
1570         if opts_proxy is not None:
1571             if opts_proxy == '':
1572                 proxies = {}
1573             else:
1574                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1575         else:
1576             proxies = compat_urllib_request.getproxies()
1577             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1578             if 'http' in proxies and 'https' not in proxies:
1579                 proxies['https'] = proxies['http']
1580         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1581
1582         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1583         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1584         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1585         opener = compat_urllib_request.build_opener(
1586             https_handler, proxy_handler, cookie_processor, ydlh)
1587         # Delete the default user-agent header, which would otherwise apply in
1588         # cases where our custom HTTP handler doesn't come into play
1589         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1590         opener.addheaders = []
1591         self._opener = opener
1592
1593     def encode(self, s):
1594         if isinstance(s, bytes):
1595             return s  # Already encoded
1596
1597         try:
1598             return s.encode(self.get_encoding())
1599         except UnicodeEncodeError as err:
1600             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1601             raise
1602
1603     def get_encoding(self):
1604         encoding = self.params.get('encoding')
1605         if encoding is None:
1606             encoding = preferredencoding()
1607         return encoding