youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     subtitles_filename,
  60     takewhile_inclusive,
  61     UnavailableVideoError,
  62     url_basename,
  63     version_tuple,
  64     write_json_file,
  65     write_string,
  66     YoutubeDLHandler,
  67     prepend_extension,
  68     args_to_str,
  69     age_restricted,
  70 )
  71 from .cache import Cache
  72 from .extractor import get_info_extractor, gen_extractors
  73 from .downloader import get_suitable_downloader
  74 from .downloader.rtmp import rtmpdump_version
  75 from .postprocessor import (
  76     FFmpegFixupM4aPP,
  77     FFmpegFixupStretchedPP,
  78     FFmpegMergerPP,
  79     FFmpegPostProcessor,
  80     get_postprocessor,
  81 )
  82 from .version import __version__
  83
  84
  85 class YoutubeDL(object):
  86     """YoutubeDL class.
  87
  88     YoutubeDL objects are the ones responsible of downloading the
  89     actual video file and writing it to disk if the user has requested
  90     it, among some other tasks. In most cases there should be one per
  91     program. As, given a video URL, the downloader doesn't know how to
  92     extract all the needed information, task that InfoExtractors do, it
  93     has to pass the URL to one of them.
  94
  95     For this, YoutubeDL objects have a method that allows
  96     InfoExtractors to be registered in a given order. When it is passed
  97     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  98     finds that reports being able to handle it. The InfoExtractor extracts
  99     all the information about the video or videos the URL refers to, and
 100     YoutubeDL process the extracted information, possibly using a File
 101     Downloader to download the video.
 102
 103     YoutubeDL objects accept a lot of parameters. In order not to saturate
 104     the object constructor with arguments, it receives a dictionary of
 105     options instead. These options are available through the params
 106     attribute for the InfoExtractors to use. The YoutubeDL also
 107     registers itself as the downloader in charge for the InfoExtractors
 108     that are added to it, so this is a "mutual registration".
 109
 110     Available options:
 111
 112     username:          Username for authentication purposes.
 113     password:          Password for authentication purposes.
 114     videopassword:     Password for acces a video.
 115     usenetrc:          Use netrc for authentication instead.
 116     verbose:           Print additional info to stdout.
 117     quiet:             Do not print messages to stdout.
 118     no_warnings:       Do not print out anything for warnings.
 119     forceurl:          Force printing final URL.
 120     forcetitle:        Force printing title.
 121     forceid:           Force printing ID.
 122     forcethumbnail:    Force printing thumbnail URL.
 123     forcedescription:  Force printing description.
 124     forcefilename:     Force printing final filename.
 125     forceduration:     Force printing duration.
 126     forcejson:         Force printing info_dict as JSON.
 127     dump_single_json:  Force printing the info_dict of the whole playlist
 128                        (or video) as a single JSON line.
 129     simulate:          Do not download the video files.
 130     format:            Video format code. See options.py for more information.
 131     format_limit:      Highest quality format to try.
 132     outtmpl:           Template for output names.
 133     restrictfilenames: Do not allow "&" and spaces in file names
 134     ignoreerrors:      Do not stop on download errors.
 135     nooverwrites:      Prevent overwriting files.
 136     playliststart:     Playlist item to start at.
 137     playlistend:       Playlist item to end at.
 138     playlistreverse:   Download playlist items in reverse order.
 139     matchtitle:        Download only matching titles.
 140     rejecttitle:       Reject downloads for matching titles.
 141     logger:            Log messages to a logging.Logger instance.
 142     logtostderr:       Log messages to stderr instead of stdout.
 143     writedescription:  Write the video description to a .description file
 144     writeinfojson:     Write the video description to a .info.json file
 145     writeannotations:  Write the video annotations to a .annotations.xml file
 146     writethumbnail:    Write the thumbnail image to a file
 147     writesubtitles:    Write the video subtitles to a file
 148     writeautomaticsub: Write the automatic subtitles to a file
 149     allsubtitles:      Downloads all the subtitles of the video
 150                        (requires writesubtitles or writeautomaticsub)
 151     listsubtitles:     Lists all available subtitles for the video
 152     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 153     subtitleslangs:    List of languages of the subtitles to download
 154     keepvideo:         Keep the video file after post-processing
 155     daterange:         A DateRange object, download only if the upload_date is in the range.
 156     skip_download:     Skip the actual download of the video file
 157     cachedir:          Location of the cache files in the filesystem.
 158                        False to disable filesystem cache.
 159     noplaylist:        Download single video instead of a playlist if in doubt.
 160     age_limit:         An integer representing the user's age in years.
 161                        Unsuitable videos for the given age are skipped.
 162     min_views:         An integer representing the minimum view count the video
 163                        must have in order to not be skipped.
 164                        Videos without view count information are always
 165                        downloaded. None for no limit.
 166     max_views:         An integer representing the maximum view count.
 167                        Videos that are more popular than that are not
 168                        downloaded.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     download_archive:  File name of a file where all downloads are recorded.
 172                        Videos already present in the file are not downloaded
 173                        again.
 174     cookiefile:        File name where cookies should be read from and dumped to.
 175     nocheckcertificate:Do not verify SSL certificates
 176     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 177                        At the moment, this is only supported by YouTube.
 178     proxy:             URL of the proxy server to use
 179     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 180     bidi_workaround:   Work around buggy terminals without bidirectional text
 181                        support, using fridibi
 182     debug_printtraffic:Print out sent and received HTTP traffic
 183     include_ads:       Download ads as well
 184     default_search:    Prepend this string if an input url is not valid.
 185                        'auto' for elaborate guessing
 186     encoding:          Use this encoding instead of the system-specified.
 187     extract_flat:      Do not resolve URLs, return the immediate result.
 188                        Pass in 'in_playlist' to only show this behavior for
 189                        playlist items.
 190     postprocessors:    A list of dictionaries, each with an entry
 191                        * key:  The name of the postprocessor. See
 192                                youtube_dl/postprocessor/__init__.py for a list.
 193                        as well as any further keyword arguments for the
 194                        postprocessor.
 195     progress_hooks:    A list of functions that get called on download
 196                        progress, with a dictionary with the entries
 197                        * filename: The final filename
 198                        * status: One of "downloading" and "finished"
 199
 200                        The dict may also have some of the following entries:
 201
 202                        * downloaded_bytes: Bytes on disk
 203                        * total_bytes: Size of the whole file, None if unknown
 204                        * tmpfilename: The filename we're currently writing to
 205                        * eta: The estimated time in seconds, None if unknown
 206                        * speed: The download speed in bytes/second, None if
 207                                 unknown
 208
 209                        Progress hooks are guaranteed to be called at least once
 210                        (with status "finished") if the download is successful.
 211     merge_output_format: Extension to use when merging formats.
 212     fixup:             Automatically correct known faults of the file.
 213                        One of:
 214                        - "never": do nothing
 215                        - "warn": only emit a warning
 216                        - "detect_or_warn": check whether we can do anything
 217                                            about it, warn otherwise (default)
 218     source_address:    (Experimental) Client-side IP address to bind to.
 219     call_home:         Boolean, true iff we are allowed to contact the
 220                        youtube-dl servers for debugging.
 221     sleep_interval:    Number of seconds to sleep before each download.
 222
 223
 224     The following parameters are not used by YoutubeDL itself, they are used by
 225     the FileDownloader:
 226     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 227     noresizebuffer, retries, continuedl, noprogress, consoletitle
 228
 229     The following options are used by the post processors:
 230     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 231                        otherwise prefer avconv.
 232     exec_cmd:          Arbitrary command to run after downloading
 233     """
 234
 235     params = None
 236     _ies = []
 237     _pps = []
 238     _download_retcode = None
 239     _num_downloads = None
 240     _screen_file = None
 241
 242     def __init__(self, params=None, auto_init=True):
 243         """Create a FileDownloader object with the given options."""
 244         if params is None:
 245             params = {}
 246         self._ies = []
 247         self._ies_instances = {}
 248         self._pps = []
 249         self._progress_hooks = []
 250         self._download_retcode = 0
 251         self._num_downloads = 0
 252         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 253         self._err_file = sys.stderr
 254         self.params = params
 255         self.cache = Cache(self)
 256
 257         if params.get('bidi_workaround', False):
 258             try:
 259                 import pty
 260                 master, slave = pty.openpty()
 261                 width = get_term_width()
 262                 if width is None:
 263                     width_args = []
 264                 else:
 265                     width_args = ['-w', str(width)]
 266                 sp_kwargs = dict(
 267                     stdin=subprocess.PIPE,
 268                     stdout=slave,
 269                     stderr=self._err_file)
 270                 try:
 271                     self._output_process = subprocess.Popen(
 272                         ['bidiv'] + width_args, **sp_kwargs
 273                     )
 274                 except OSError:
 275                     self._output_process = subprocess.Popen(
 276                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 277                 self._output_channel = os.fdopen(master, 'rb')
 278             except OSError as ose:
 279                 if ose.errno == 2:
 280                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 281                 else:
 282                     raise
 283
 284         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 285                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 286                 and not params.get('restrictfilenames', False)):
 287             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 288             self.report_warning(
 289                 'Assuming --restrict-filenames since file system encoding '
 290                 'cannot encode all characters. '
 291                 'Set the LC_ALL environment variable to fix this.')
 292             self.params['restrictfilenames'] = True
 293
 294         if '%(stitle)s' in self.params.get('outtmpl', ''):
 295             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 296
 297         self._setup_opener()
 298
 299         if auto_init:
 300             self.print_debug_header()
 301             self.add_default_info_extractors()
 302
 303         for pp_def_raw in self.params.get('postprocessors', []):
 304             pp_class = get_postprocessor(pp_def_raw['key'])
 305             pp_def = dict(pp_def_raw)
 306             del pp_def['key']
 307             pp = pp_class(self, **compat_kwargs(pp_def))
 308             self.add_post_processor(pp)
 309
 310         for ph in self.params.get('progress_hooks', []):
 311             self.add_progress_hook(ph)
 312
 313     def warn_if_short_id(self, argv):
 314         # short YouTube ID starting with dash?
 315         idxs = [
 316             i for i, a in enumerate(argv)
 317             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 318         if idxs:
 319             correct_argv = (
 320                 ['youtube-dl'] +
 321                 [a for i, a in enumerate(argv) if i not in idxs] +
 322                 ['--'] + [argv[i] for i in idxs]
 323             )
 324             self.report_warning(
 325                 'Long argument string detected. '
 326                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 327                 args_to_str(correct_argv))
 328
 329     def add_info_extractor(self, ie):
 330         """Add an InfoExtractor object to the end of the list."""
 331         self._ies.append(ie)
 332         self._ies_instances[ie.ie_key()] = ie
 333         ie.set_downloader(self)
 334
 335     def get_info_extractor(self, ie_key):
 336         """
 337         Get an instance of an IE with name ie_key, it will try to get one from
 338         the _ies list, if there's no instance it will create a new one and add
 339         it to the extractor list.
 340         """
 341         ie = self._ies_instances.get(ie_key)
 342         if ie is None:
 343             ie = get_info_extractor(ie_key)()
 344             self.add_info_extractor(ie)
 345         return ie
 346
 347     def add_default_info_extractors(self):
 348         """
 349         Add the InfoExtractors returned by gen_extractors to the end of the list
 350         """
 351         for ie in gen_extractors():
 352             self.add_info_extractor(ie)
 353
 354     def add_post_processor(self, pp):
 355         """Add a PostProcessor object to the end of the chain."""
 356         self._pps.append(pp)
 357         pp.set_downloader(self)
 358
 359     def add_progress_hook(self, ph):
 360         """Add the progress hook (currently only for the file downloader)"""
 361         self._progress_hooks.append(ph)
 362
 363     def _bidi_workaround(self, message):
 364         if not hasattr(self, '_output_channel'):
 365             return message
 366
 367         assert hasattr(self, '_output_process')
 368         assert isinstance(message, compat_str)
 369         line_count = message.count('\n') + 1
 370         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 371         self._output_process.stdin.flush()
 372         res = ''.join(self._output_channel.readline().decode('utf-8')
 373                       for _ in range(line_count))
 374         return res[:-len('\n')]
 375
 376     def to_screen(self, message, skip_eol=False):
 377         """Print message to stdout if not in quiet mode."""
 378         return self.to_stdout(message, skip_eol, check_quiet=True)
 379
 380     def _write_string(self, s, out=None):
 381         write_string(s, out=out, encoding=self.params.get('encoding'))
 382
 383     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 384         """Print message to stdout if not in quiet mode."""
 385         if self.params.get('logger'):
 386             self.params['logger'].debug(message)
 387         elif not check_quiet or not self.params.get('quiet', False):
 388             message = self._bidi_workaround(message)
 389             terminator = ['\n', ''][skip_eol]
 390             output = message + terminator
 391
 392             self._write_string(output, self._screen_file)
 393
 394     def to_stderr(self, message):
 395         """Print message to stderr."""
 396         assert isinstance(message, compat_str)
 397         if self.params.get('logger'):
 398             self.params['logger'].error(message)
 399         else:
 400             message = self._bidi_workaround(message)
 401             output = message + '\n'
 402             self._write_string(output, self._err_file)
 403
 404     def to_console_title(self, message):
 405         if not self.params.get('consoletitle', False):
 406             return
 407         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 408             # c_wchar_p() might not be necessary if `message` is
 409             # already of type unicode()
 410             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 411         elif 'TERM' in os.environ:
 412             self._write_string('\033]0;%s\007' % message, self._screen_file)
 413
 414     def save_console_title(self):
 415         if not self.params.get('consoletitle', False):
 416             return
 417         if 'TERM' in os.environ:
 418             # Save the title on stack
 419             self._write_string('\033[22;0t', self._screen_file)
 420
 421     def restore_console_title(self):
 422         if not self.params.get('consoletitle', False):
 423             return
 424         if 'TERM' in os.environ:
 425             # Restore the title from stack
 426             self._write_string('\033[23;0t', self._screen_file)
 427
 428     def __enter__(self):
 429         self.save_console_title()
 430         return self
 431
 432     def __exit__(self, *args):
 433         self.restore_console_title()
 434
 435         if self.params.get('cookiefile') is not None:
 436             self.cookiejar.save()
 437
 438     def trouble(self, message=None, tb=None):
 439         """Determine action to take when a download problem appears.
 440
 441         Depending on if the downloader has been configured to ignore
 442         download errors or not, this method may throw an exception or
 443         not when errors are found, after printing the message.
 444
 445         tb, if given, is additional traceback information.
 446         """
 447         if message is not None:
 448             self.to_stderr(message)
 449         if self.params.get('verbose'):
 450             if tb is None:
 451                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 452                     tb = ''
 453                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 454                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 455                     tb += compat_str(traceback.format_exc())
 456                 else:
 457                     tb_data = traceback.format_list(traceback.extract_stack())
 458                     tb = ''.join(tb_data)
 459             self.to_stderr(tb)
 460         if not self.params.get('ignoreerrors', False):
 461             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 462                 exc_info = sys.exc_info()[1].exc_info
 463             else:
 464                 exc_info = sys.exc_info()
 465             raise DownloadError(message, exc_info)
 466         self._download_retcode = 1
 467
 468     def report_warning(self, message):
 469         '''
 470         Print the message to stderr, it will be prefixed with 'WARNING:'
 471         If stderr is a tty file the 'WARNING:' will be colored
 472         '''
 473         if self.params.get('logger') is not None:
 474             self.params['logger'].warning(message)
 475         else:
 476             if self.params.get('no_warnings'):
 477                 return
 478             if self._err_file.isatty() and os.name != 'nt':
 479                 _msg_header = '\033[0;33mWARNING:\033[0m'
 480             else:
 481                 _msg_header = 'WARNING:'
 482             warning_message = '%s %s' % (_msg_header, message)
 483             self.to_stderr(warning_message)
 484
 485     def report_error(self, message, tb=None):
 486         '''
 487         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 488         in red if stderr is a tty file.
 489         '''
 490         if self._err_file.isatty() and os.name != 'nt':
 491             _msg_header = '\033[0;31mERROR:\033[0m'
 492         else:
 493             _msg_header = 'ERROR:'
 494         error_message = '%s %s' % (_msg_header, message)
 495         self.trouble(error_message, tb)
 496
 497     def report_file_already_downloaded(self, file_name):
 498         """Report file has already been fully downloaded."""
 499         try:
 500             self.to_screen('[download] %s has already been downloaded' % file_name)
 501         except UnicodeEncodeError:
 502             self.to_screen('[download] The file has already been downloaded')
 503
 504     def prepare_filename(self, info_dict):
 505         """Generate the output filename."""
 506         try:
 507             template_dict = dict(info_dict)
 508
 509             template_dict['epoch'] = int(time.time())
 510             autonumber_size = self.params.get('autonumber_size')
 511             if autonumber_size is None:
 512                 autonumber_size = 5
 513             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 514             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 515             if template_dict.get('playlist_index') is not None:
 516                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 517             if template_dict.get('resolution') is None:
 518                 if template_dict.get('width') and template_dict.get('height'):
 519                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 520                 elif template_dict.get('height'):
 521                     template_dict['resolution'] = '%sp' % template_dict['height']
 522                 elif template_dict.get('width'):
 523                     template_dict['resolution'] = '?x%d' % template_dict['width']
 524
 525             sanitize = lambda k, v: sanitize_filename(
 526                 compat_str(v),
 527                 restricted=self.params.get('restrictfilenames'),
 528                 is_id=(k == 'id'))
 529             template_dict = dict((k, sanitize(k, v))
 530                                  for k, v in template_dict.items()
 531                                  if v is not None)
 532             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 533
 534             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 535             tmpl = compat_expanduser(outtmpl)
 536             filename = tmpl % template_dict
 537             return filename
 538         except ValueError as err:
 539             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 540             return None
 541
 542     def _match_entry(self, info_dict):
 543         """ Returns None iff the file should be downloaded """
 544
 545         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 546         if 'title' in info_dict:
 547             # This can happen when we're just evaluating the playlist
 548             title = info_dict['title']
 549             matchtitle = self.params.get('matchtitle', False)
 550             if matchtitle:
 551                 if not re.search(matchtitle, title, re.IGNORECASE):
 552                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 553             rejecttitle = self.params.get('rejecttitle', False)
 554             if rejecttitle:
 555                 if re.search(rejecttitle, title, re.IGNORECASE):
 556                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 557         date = info_dict.get('upload_date', None)
 558         if date is not None:
 559             dateRange = self.params.get('daterange', DateRange())
 560             if date not in dateRange:
 561                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 562         view_count = info_dict.get('view_count', None)
 563         if view_count is not None:
 564             min_views = self.params.get('min_views')
 565             if min_views is not None and view_count < min_views:
 566                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 567             max_views = self.params.get('max_views')
 568             if max_views is not None and view_count > max_views:
 569                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 570         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 571             return 'Skipping "%s" because it is age restricted' % title
 572         if self.in_download_archive(info_dict):
 573             return '%s has already been recorded in archive' % video_title
 574         return None
 575
 576     @staticmethod
 577     def add_extra_info(info_dict, extra_info):
 578         '''Set the keys from extra_info in info dict if they are missing'''
 579         for key, value in extra_info.items():
 580             info_dict.setdefault(key, value)
 581
 582     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 583                      process=True):
 584         '''
 585         Returns a list with a dictionary for each video we find.
 586         If 'download', also downloads the videos.
 587         extra_info is a dict containing the extra values to add to each result
 588          '''
 589
 590         if ie_key:
 591             ies = [self.get_info_extractor(ie_key)]
 592         else:
 593             ies = self._ies
 594
 595         for ie in ies:
 596             if not ie.suitable(url):
 597                 continue
 598
 599             if not ie.working():
 600                 self.report_warning('The program functionality for this site has been marked as broken, '
 601                                     'and will probably not work.')
 602
 603             try:
 604                 ie_result = ie.extract(url)
 605                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 606                     break
 607                 if isinstance(ie_result, list):
 608                     # Backwards compatibility: old IE result format
 609                     ie_result = {
 610                         '_type': 'compat_list',
 611                         'entries': ie_result,
 612                     }
 613                 self.add_default_extra_info(ie_result, ie, url)
 614                 if process:
 615                     return self.process_ie_result(ie_result, download, extra_info)
 616                 else:
 617                     return ie_result
 618             except ExtractorError as de:  # An error we somewhat expected
 619                 self.report_error(compat_str(de), de.format_traceback())
 620                 break
 621             except MaxDownloadsReached:
 622                 raise
 623             except Exception as e:
 624                 if self.params.get('ignoreerrors', False):
 625                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 626                     break
 627                 else:
 628                     raise
 629         else:
 630             self.report_error('no suitable InfoExtractor for URL %s' % url)
 631
 632     def add_default_extra_info(self, ie_result, ie, url):
 633         self.add_extra_info(ie_result, {
 634             'extractor': ie.IE_NAME,
 635             'webpage_url': url,
 636             'webpage_url_basename': url_basename(url),
 637             'extractor_key': ie.ie_key(),
 638         })
 639
 640     def process_ie_result(self, ie_result, download=True, extra_info={}):
 641         """
 642         Take the result of the ie(may be modified) and resolve all unresolved
 643         references (URLs, playlist items).
 644
 645         It will also download the videos if 'download'.
 646         Returns the resolved ie_result.
 647         """
 648
 649         result_type = ie_result.get('_type', 'video')
 650
 651         if result_type in ('url', 'url_transparent'):
 652             extract_flat = self.params.get('extract_flat', False)
 653             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 654                     extract_flat is True):
 655                 if self.params.get('forcejson', False):
 656                     self.to_stdout(json.dumps(ie_result))
 657                 return ie_result
 658
 659         if result_type == 'video':
 660             self.add_extra_info(ie_result, extra_info)
 661             return self.process_video_result(ie_result, download=download)
 662         elif result_type == 'url':
 663             # We have to add extra_info to the results because it may be
 664             # contained in a playlist
 665             return self.extract_info(ie_result['url'],
 666                                      download,
 667                                      ie_key=ie_result.get('ie_key'),
 668                                      extra_info=extra_info)
 669         elif result_type == 'url_transparent':
 670             # Use the information from the embedding page
 671             info = self.extract_info(
 672                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 673                 extra_info=extra_info, download=False, process=False)
 674
 675             force_properties = dict(
 676                 (k, v) for k, v in ie_result.items() if v is not None)
 677             for f in ('_type', 'url'):
 678                 if f in force_properties:
 679                     del force_properties[f]
 680             new_result = info.copy()
 681             new_result.update(force_properties)
 682
 683             assert new_result.get('_type') != 'url_transparent'
 684
 685             return self.process_ie_result(
 686                 new_result, download=download, extra_info=extra_info)
 687         elif result_type == 'playlist' or result_type == 'multi_video':
 688             # We process each entry in the playlist
 689             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 690             self.to_screen('[download] Downloading playlist: %s' % playlist)
 691
 692             playlist_results = []
 693
 694             playliststart = self.params.get('playliststart', 1) - 1
 695             playlistend = self.params.get('playlistend', None)
 696             # For backwards compatibility, interpret -1 as whole list
 697             if playlistend == -1:
 698                 playlistend = None
 699
 700             ie_entries = ie_result['entries']
 701             if isinstance(ie_entries, list):
 702                 n_all_entries = len(ie_entries)
 703                 entries = ie_entries[playliststart:playlistend]
 704                 n_entries = len(entries)
 705                 self.to_screen(
 706                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 707                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 708             elif isinstance(ie_entries, PagedList):
 709                 entries = ie_entries.getslice(
 710                     playliststart, playlistend)
 711                 n_entries = len(entries)
 712                 self.to_screen(
 713                     "[%s] playlist %s: Downloading %d videos" %
 714                     (ie_result['extractor'], playlist, n_entries))
 715             else:  # iterable
 716                 entries = list(itertools.islice(
 717                     ie_entries, playliststart, playlistend))
 718                 n_entries = len(entries)
 719                 self.to_screen(
 720                     "[%s] playlist %s: Downloading %d videos" %
 721                     (ie_result['extractor'], playlist, n_entries))
 722
 723             if self.params.get('playlistreverse', False):
 724                 entries = entries[::-1]
 725
 726             for i, entry in enumerate(entries, 1):
 727                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 728                 extra = {
 729                     'n_entries': n_entries,
 730                     'playlist': playlist,
 731                     'playlist_id': ie_result.get('id'),
 732                     'playlist_title': ie_result.get('title'),
 733                     'playlist_index': i + playliststart,
 734                     'extractor': ie_result['extractor'],
 735                     'webpage_url': ie_result['webpage_url'],
 736                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 737                     'extractor_key': ie_result['extractor_key'],
 738                 }
 739
 740                 reason = self._match_entry(entry)
 741                 if reason is not None:
 742                     self.to_screen('[download] ' + reason)
 743                     continue
 744
 745                 entry_result = self.process_ie_result(entry,
 746                                                       download=download,
 747                                                       extra_info=extra)
 748                 playlist_results.append(entry_result)
 749             ie_result['entries'] = playlist_results
 750             return ie_result
 751         elif result_type == 'compat_list':
 752             self.report_warning(
 753                 'Extractor %s returned a compat_list result. '
 754                 'It needs to be updated.' % ie_result.get('extractor'))
 755
 756             def _fixup(r):
 757                 self.add_extra_info(
 758                     r,
 759                     {
 760                         'extractor': ie_result['extractor'],
 761                         'webpage_url': ie_result['webpage_url'],
 762                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 763                         'extractor_key': ie_result['extractor_key'],
 764                     }
 765                 )
 766                 return r
 767             ie_result['entries'] = [
 768                 self.process_ie_result(_fixup(r), download, extra_info)
 769                 for r in ie_result['entries']
 770             ]
 771             return ie_result
 772         else:
 773             raise Exception('Invalid result type: %s' % result_type)
 774
 775     def _apply_format_filter(self, format_spec, available_formats):
 776         " Returns a tuple of the remaining format_spec and filtered formats "
 777
 778         OPERATORS = {
 779             '<': operator.lt,
 780             '<=': operator.le,
 781             '>': operator.gt,
 782             '>=': operator.ge,
 783             '=': operator.eq,
 784             '!=': operator.ne,
 785         }
 786         operator_rex = re.compile(r'''(?x)\s*\[
 787             (?P<key>width|height|tbr|abr|vbr|filesize)
 788             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 789             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 790             \]$
 791             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 792         m = operator_rex.search(format_spec)
 793         if not m:
 794             raise ValueError('Invalid format specification %r' % format_spec)
 795
 796         try:
 797             comparison_value = int(m.group('value'))
 798         except ValueError:
 799             comparison_value = parse_filesize(m.group('value'))
 800             if comparison_value is None:
 801                 comparison_value = parse_filesize(m.group('value') + 'B')
 802             if comparison_value is None:
 803                 raise ValueError(
 804                     'Invalid value %r in format specification %r' % (
 805                         m.group('value'), format_spec))
 806         op = OPERATORS[m.group('op')]
 807
 808         def _filter(f):
 809             actual_value = f.get(m.group('key'))
 810             if actual_value is None:
 811                 return m.group('none_inclusive')
 812             return op(actual_value, comparison_value)
 813         new_formats = [f for f in available_formats if _filter(f)]
 814
 815         new_format_spec = format_spec[:-len(m.group(0))]
 816         if not new_format_spec:
 817             new_format_spec = 'best'
 818
 819         return (new_format_spec, new_formats)
 820
 821     def select_format(self, format_spec, available_formats):
 822         while format_spec.endswith(']'):
 823             format_spec, available_formats = self._apply_format_filter(
 824                 format_spec, available_formats)
 825         if not available_formats:
 826             return None
 827
 828         if format_spec == 'best' or format_spec is None:
 829             return available_formats[-1]
 830         elif format_spec == 'worst':
 831             return available_formats[0]
 832         elif format_spec == 'bestaudio':
 833             audio_formats = [
 834                 f for f in available_formats
 835                 if f.get('vcodec') == 'none']
 836             if audio_formats:
 837                 return audio_formats[-1]
 838         elif format_spec == 'worstaudio':
 839             audio_formats = [
 840                 f for f in available_formats
 841                 if f.get('vcodec') == 'none']
 842             if audio_formats:
 843                 return audio_formats[0]
 844         elif format_spec == 'bestvideo':
 845             video_formats = [
 846                 f for f in available_formats
 847                 if f.get('acodec') == 'none']
 848             if video_formats:
 849                 return video_formats[-1]
 850         elif format_spec == 'worstvideo':
 851             video_formats = [
 852                 f for f in available_formats
 853                 if f.get('acodec') == 'none']
 854             if video_formats:
 855                 return video_formats[0]
 856         else:
 857             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 858             if format_spec in extensions:
 859                 filter_f = lambda f: f['ext'] == format_spec
 860             else:
 861                 filter_f = lambda f: f['format_id'] == format_spec
 862             matches = list(filter(filter_f, available_formats))
 863             if matches:
 864                 return matches[-1]
 865         return None
 866
 867     def process_video_result(self, info_dict, download=True):
 868         assert info_dict.get('_type', 'video') == 'video'
 869
 870         if 'id' not in info_dict:
 871             raise ExtractorError('Missing "id" field in extractor result')
 872         if 'title' not in info_dict:
 873             raise ExtractorError('Missing "title" field in extractor result')
 874
 875         if 'playlist' not in info_dict:
 876             # It isn't part of a playlist
 877             info_dict['playlist'] = None
 878             info_dict['playlist_index'] = None
 879
 880         thumbnails = info_dict.get('thumbnails')
 881         if thumbnails:
 882             thumbnails.sort(key=lambda t: (
 883                 t.get('width'), t.get('height'), t.get('url')))
 884             for t in thumbnails:
 885                 if 'width' in t and 'height' in t:
 886                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 887
 888         if thumbnails and 'thumbnail' not in info_dict:
 889             info_dict['thumbnail'] = thumbnails[-1]['url']
 890
 891         if 'display_id' not in info_dict and 'id' in info_dict:
 892             info_dict['display_id'] = info_dict['id']
 893
 894         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 895             # Working around negative timestamps in Windows
 896             # (see http://bugs.python.org/issue1646728)
 897             if info_dict['timestamp'] < 0 and os.name == 'nt':
 898                 info_dict['timestamp'] = 0
 899             upload_date = datetime.datetime.utcfromtimestamp(
 900                 info_dict['timestamp'])
 901             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 902
 903         # This extractors handle format selection themselves
 904         if info_dict['extractor'] in ['Youku']:
 905             if download:
 906                 self.process_info(info_dict)
 907             return info_dict
 908
 909         # We now pick which formats have to be downloaded
 910         if info_dict.get('formats') is None:
 911             # There's only one format available
 912             formats = [info_dict]
 913         else:
 914             formats = info_dict['formats']
 915
 916         if not formats:
 917             raise ExtractorError('No video formats found!')
 918
 919         # We check that all the formats have the format and format_id fields
 920         for i, format in enumerate(formats):
 921             if 'url' not in format:
 922                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 923
 924             if format.get('format_id') is None:
 925                 format['format_id'] = compat_str(i)
 926             if format.get('format') is None:
 927                 format['format'] = '{id} - {res}{note}'.format(
 928                     id=format['format_id'],
 929                     res=self.format_resolution(format),
 930                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 931                 )
 932             # Automatically determine file extension if missing
 933             if 'ext' not in format:
 934                 format['ext'] = determine_ext(format['url']).lower()
 935
 936         format_limit = self.params.get('format_limit', None)
 937         if format_limit:
 938             formats = list(takewhile_inclusive(
 939                 lambda f: f['format_id'] != format_limit, formats
 940             ))
 941
 942         # TODO Central sorting goes here
 943
 944         if formats[0] is not info_dict:
 945             # only set the 'formats' fields if the original info_dict list them
 946             # otherwise we end up with a circular reference, the first (and unique)
 947             # element in the 'formats' field in info_dict is info_dict itself,
 948             # wich can't be exported to json
 949             info_dict['formats'] = formats
 950         if self.params.get('listformats', None):
 951             self.list_formats(info_dict)
 952             return
 953
 954         req_format = self.params.get('format')
 955         if req_format is None:
 956             req_format = 'best'
 957         formats_to_download = []
 958         # The -1 is for supporting YoutubeIE
 959         if req_format in ('-1', 'all'):
 960             formats_to_download = formats
 961         else:
 962             for rfstr in req_format.split(','):
 963                 # We can accept formats requested in the format: 34/5/best, we pick
 964                 # the first that is available, starting from left
 965                 req_formats = rfstr.split('/')
 966                 for rf in req_formats:
 967                     if re.match(r'.+?\+.+?', rf) is not None:
 968                         # Two formats have been requested like '137+139'
 969                         format_1, format_2 = rf.split('+')
 970                         formats_info = (self.select_format(format_1, formats),
 971                                         self.select_format(format_2, formats))
 972                         if all(formats_info):
 973                             # The first format must contain the video and the
 974                             # second the audio
 975                             if formats_info[0].get('vcodec') == 'none':
 976                                 self.report_error('The first format must '
 977                                                   'contain the video, try using '
 978                                                   '"-f %s+%s"' % (format_2, format_1))
 979                                 return
 980                             output_ext = (
 981                                 formats_info[0]['ext']
 982                                 if self.params.get('merge_output_format') is None
 983                                 else self.params['merge_output_format'])
 984                             selected_format = {
 985                                 'requested_formats': formats_info,
 986                                 'format': rf,
 987                                 'ext': formats_info[0]['ext'],
 988                                 'width': formats_info[0].get('width'),
 989                                 'height': formats_info[0].get('height'),
 990                                 'resolution': formats_info[0].get('resolution'),
 991                                 'fps': formats_info[0].get('fps'),
 992                                 'vcodec': formats_info[0].get('vcodec'),
 993                                 'vbr': formats_info[0].get('vbr'),
 994                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
 995                                 'acodec': formats_info[1].get('acodec'),
 996                                 'abr': formats_info[1].get('abr'),
 997                                 'ext': output_ext,
 998                             }
 999                         else:
1000                             selected_format = None
1001                     else:
1002                         selected_format = self.select_format(rf, formats)
1003                     if selected_format is not None:
1004                         formats_to_download.append(selected_format)
1005                         break
1006         if not formats_to_download:
1007             raise ExtractorError('requested format not available',
1008                                  expected=True)
1009
1010         if download:
1011             if len(formats_to_download) > 1:
1012                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1013             for format in formats_to_download:
1014                 new_info = dict(info_dict)
1015                 new_info.update(format)
1016                 self.process_info(new_info)
1017         # We update the info dict with the best quality format (backwards compatibility)
1018         info_dict.update(formats_to_download[-1])
1019         return info_dict
1020
1021     def process_info(self, info_dict):
1022         """Process a single resolved IE result."""
1023
1024         assert info_dict.get('_type', 'video') == 'video'
1025
1026         max_downloads = self.params.get('max_downloads')
1027         if max_downloads is not None:
1028             if self._num_downloads >= int(max_downloads):
1029                 raise MaxDownloadsReached()
1030
1031         info_dict['fulltitle'] = info_dict['title']
1032         if len(info_dict['title']) > 200:
1033             info_dict['title'] = info_dict['title'][:197] + '...'
1034
1035         # Keep for backwards compatibility
1036         info_dict['stitle'] = info_dict['title']
1037
1038         if 'format' not in info_dict:
1039             info_dict['format'] = info_dict['ext']
1040
1041         reason = self._match_entry(info_dict)
1042         if reason is not None:
1043             self.to_screen('[download] ' + reason)
1044             return
1045
1046         self._num_downloads += 1
1047
1048         filename = self.prepare_filename(info_dict)
1049
1050         # Forced printings
1051         if self.params.get('forcetitle', False):
1052             self.to_stdout(info_dict['fulltitle'])
1053         if self.params.get('forceid', False):
1054             self.to_stdout(info_dict['id'])
1055         if self.params.get('forceurl', False):
1056             if info_dict.get('requested_formats') is not None:
1057                 for f in info_dict['requested_formats']:
1058                     self.to_stdout(f['url'] + f.get('play_path', ''))
1059             else:
1060                 # For RTMP URLs, also include the playpath
1061                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1062         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1063             self.to_stdout(info_dict['thumbnail'])
1064         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1065             self.to_stdout(info_dict['description'])
1066         if self.params.get('forcefilename', False) and filename is not None:
1067             self.to_stdout(filename)
1068         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1069             self.to_stdout(formatSeconds(info_dict['duration']))
1070         if self.params.get('forceformat', False):
1071             self.to_stdout(info_dict['format'])
1072         if self.params.get('forcejson', False):
1073             info_dict['_filename'] = filename
1074             self.to_stdout(json.dumps(info_dict))
1075         if self.params.get('dump_single_json', False):
1076             info_dict['_filename'] = filename
1077
1078         # Do nothing else if in simulate mode
1079         if self.params.get('simulate', False):
1080             return
1081
1082         if filename is None:
1083             return
1084
1085         try:
1086             dn = os.path.dirname(encodeFilename(filename))
1087             if dn and not os.path.exists(dn):
1088                 os.makedirs(dn)
1089         except (OSError, IOError) as err:
1090             self.report_error('unable to create directory ' + compat_str(err))
1091             return
1092
1093         if self.params.get('writedescription', False):
1094             descfn = filename + '.description'
1095             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1096                 self.to_screen('[info] Video description is already present')
1097             elif info_dict.get('description') is None:
1098                 self.report_warning('There\'s no description to write.')
1099             else:
1100                 try:
1101                     self.to_screen('[info] Writing video description to: ' + descfn)
1102                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1103                         descfile.write(info_dict['description'])
1104                 except (OSError, IOError):
1105                     self.report_error('Cannot write description file ' + descfn)
1106                     return
1107
1108         if self.params.get('writeannotations', False):
1109             annofn = filename + '.annotations.xml'
1110             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1111                 self.to_screen('[info] Video annotations are already present')
1112             else:
1113                 try:
1114                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1115                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1116                         annofile.write(info_dict['annotations'])
1117                 except (KeyError, TypeError):
1118                     self.report_warning('There are no annotations to write.')
1119                 except (OSError, IOError):
1120                     self.report_error('Cannot write annotations file: ' + annofn)
1121                     return
1122
1123         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1124                                        self.params.get('writeautomaticsub')])
1125
1126         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1127             # subtitles download errors are already managed as troubles in relevant IE
1128             # that way it will silently go on when used with unsupporting IE
1129             subtitles = info_dict['subtitles']
1130             sub_format = self.params.get('subtitlesformat', 'srt')
1131             for sub_lang in subtitles.keys():
1132                 sub = subtitles[sub_lang]
1133                 if sub is None:
1134                     continue
1135                 try:
1136                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1137                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1138                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1139                     else:
1140                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1141                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1142                             subfile.write(sub)
1143                 except (OSError, IOError):
1144                     self.report_error('Cannot write subtitles file ' + sub_filename)
1145                     return
1146
1147         if self.params.get('writeinfojson', False):
1148             infofn = os.path.splitext(filename)[0] + '.info.json'
1149             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1150                 self.to_screen('[info] Video description metadata is already present')
1151             else:
1152                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1153                 try:
1154                     write_json_file(info_dict, infofn)
1155                 except (OSError, IOError):
1156                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1157                     return
1158
1159         if self.params.get('writethumbnail', False):
1160             if info_dict.get('thumbnail') is not None:
1161                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1162                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1163                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1164                     self.to_screen('[%s] %s: Thumbnail is already present' %
1165                                    (info_dict['extractor'], info_dict['id']))
1166                 else:
1167                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1168                                    (info_dict['extractor'], info_dict['id']))
1169                     try:
1170                         uf = self.urlopen(info_dict['thumbnail'])
1171                         with open(thumb_filename, 'wb') as thumbf:
1172                             shutil.copyfileobj(uf, thumbf)
1173                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1174                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1175                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1176                         self.report_warning('Unable to download thumbnail "%s": %s' %
1177                                             (info_dict['thumbnail'], compat_str(err)))
1178
1179         if not self.params.get('skip_download', False):
1180             try:
1181                 def dl(name, info):
1182                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1183                     for ph in self._progress_hooks:
1184                         fd.add_progress_hook(ph)
1185                     if self.params.get('verbose'):
1186                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1187                     return fd.download(name, info)
1188                 if info_dict.get('requested_formats') is not None:
1189                     downloaded = []
1190                     success = True
1191                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1192                     if not merger._executable:
1193                         postprocessors = []
1194                         self.report_warning('You have requested multiple '
1195                                             'formats but ffmpeg or avconv are not installed.'
1196                                             ' The formats won\'t be merged')
1197                     else:
1198                         postprocessors = [merger]
1199                     for f in info_dict['requested_formats']:
1200                         new_info = dict(info_dict)
1201                         new_info.update(f)
1202                         fname = self.prepare_filename(new_info)
1203                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1204                         downloaded.append(fname)
1205                         partial_success = dl(fname, new_info)
1206                         success = success and partial_success
1207                     info_dict['__postprocessors'] = postprocessors
1208                     info_dict['__files_to_merge'] = downloaded
1209                 else:
1210                     # Just a single file
1211                     success = dl(filename, info_dict)
1212             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1213                 self.report_error('unable to download video data: %s' % str(err))
1214                 return
1215             except (OSError, IOError) as err:
1216                 raise UnavailableVideoError(err)
1217             except (ContentTooShortError, ) as err:
1218                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1219                 return
1220
1221             if success:
1222                 # Fixup content
1223                 fixup_policy = self.params.get('fixup')
1224                 if fixup_policy is None:
1225                     fixup_policy = 'detect_or_warn'
1226
1227                 stretched_ratio = info_dict.get('stretched_ratio')
1228                 if stretched_ratio is not None and stretched_ratio != 1:
1229                     if fixup_policy == 'warn':
1230                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1231                             info_dict['id'], stretched_ratio))
1232                     elif fixup_policy == 'detect_or_warn':
1233                         stretched_pp = FFmpegFixupStretchedPP(self)
1234                         if stretched_pp.available:
1235                             info_dict.setdefault('__postprocessors', [])
1236                             info_dict['__postprocessors'].append(stretched_pp)
1237                         else:
1238                             self.report_warning(
1239                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1240                                     info_dict['id'], stretched_ratio))
1241                     else:
1242                         assert fixup_policy in ('ignore', 'never')
1243
1244                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1245                     if fixup_policy == 'warn':
1246                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1247                             info_dict['id']))
1248                     elif fixup_policy == 'detect_or_warn':
1249                         fixup_pp = FFmpegFixupM4aPP(self)
1250                         if fixup_pp.available:
1251                             info_dict.setdefault('__postprocessors', [])
1252                             info_dict['__postprocessors'].append(fixup_pp)
1253                         else:
1254                             self.report_warning(
1255                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1256                                     info_dict['id']))
1257                     else:
1258                         assert fixup_policy in ('ignore', 'never')
1259
1260                 try:
1261                     self.post_process(filename, info_dict)
1262                 except (PostProcessingError) as err:
1263                     self.report_error('postprocessing: %s' % str(err))
1264                     return
1265                 self.record_download_archive(info_dict)
1266
1267     def download(self, url_list):
1268         """Download a given list of URLs."""
1269         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1270         if (len(url_list) > 1 and
1271                 '%' not in outtmpl
1272                 and self.params.get('max_downloads') != 1):
1273             raise SameFileError(outtmpl)
1274
1275         for url in url_list:
1276             try:
1277                 # It also downloads the videos
1278                 res = self.extract_info(url)
1279             except UnavailableVideoError:
1280                 self.report_error('unable to download video')
1281             except MaxDownloadsReached:
1282                 self.to_screen('[info] Maximum number of downloaded files reached.')
1283                 raise
1284             else:
1285                 if self.params.get('dump_single_json', False):
1286                     self.to_stdout(json.dumps(res))
1287
1288         return self._download_retcode
1289
1290     def download_with_info_file(self, info_filename):
1291         with io.open(info_filename, 'r', encoding='utf-8') as f:
1292             info = json.load(f)
1293         try:
1294             self.process_ie_result(info, download=True)
1295         except DownloadError:
1296             webpage_url = info.get('webpage_url')
1297             if webpage_url is not None:
1298                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1299                 return self.download([webpage_url])
1300             else:
1301                 raise
1302         return self._download_retcode
1303
1304     def post_process(self, filename, ie_info):
1305         """Run all the postprocessors on the given file."""
1306         info = dict(ie_info)
1307         info['filepath'] = filename
1308         pps_chain = []
1309         if ie_info.get('__postprocessors') is not None:
1310             pps_chain.extend(ie_info['__postprocessors'])
1311         pps_chain.extend(self._pps)
1312         for pp in pps_chain:
1313             keep_video = None
1314             old_filename = info['filepath']
1315             try:
1316                 keep_video_wish, info = pp.run(info)
1317                 if keep_video_wish is not None:
1318                     if keep_video_wish:
1319                         keep_video = keep_video_wish
1320                     elif keep_video is None:
1321                         # No clear decision yet, let IE decide
1322                         keep_video = keep_video_wish
1323             except PostProcessingError as e:
1324                 self.report_error(e.msg)
1325             if keep_video is False and not self.params.get('keepvideo', False):
1326                 try:
1327                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1328                     os.remove(encodeFilename(old_filename))
1329                 except (IOError, OSError):
1330                     self.report_warning('Unable to remove downloaded video file')
1331
1332     def _make_archive_id(self, info_dict):
1333         # Future-proof against any change in case
1334         # and backwards compatibility with prior versions
1335         extractor = info_dict.get('extractor_key')
1336         if extractor is None:
1337             if 'id' in info_dict:
1338                 extractor = info_dict.get('ie_key')  # key in a playlist
1339         if extractor is None:
1340             return None  # Incomplete video information
1341         return extractor.lower() + ' ' + info_dict['id']
1342
1343     def in_download_archive(self, info_dict):
1344         fn = self.params.get('download_archive')
1345         if fn is None:
1346             return False
1347
1348         vid_id = self._make_archive_id(info_dict)
1349         if vid_id is None:
1350             return False  # Incomplete video information
1351
1352         try:
1353             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1354                 for line in archive_file:
1355                     if line.strip() == vid_id:
1356                         return True
1357         except IOError as ioe:
1358             if ioe.errno != errno.ENOENT:
1359                 raise
1360         return False
1361
1362     def record_download_archive(self, info_dict):
1363         fn = self.params.get('download_archive')
1364         if fn is None:
1365             return
1366         vid_id = self._make_archive_id(info_dict)
1367         assert vid_id
1368         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1369             archive_file.write(vid_id + '\n')
1370
1371     @staticmethod
1372     def format_resolution(format, default='unknown'):
1373         if format.get('vcodec') == 'none':
1374             return 'audio only'
1375         if format.get('resolution') is not None:
1376             return format['resolution']
1377         if format.get('height') is not None:
1378             if format.get('width') is not None:
1379                 res = '%sx%s' % (format['width'], format['height'])
1380             else:
1381                 res = '%sp' % format['height']
1382         elif format.get('width') is not None:
1383             res = '?x%d' % format['width']
1384         else:
1385             res = default
1386         return res
1387
1388     def _format_note(self, fdict):
1389         res = ''
1390         if fdict.get('ext') in ['f4f', 'f4m']:
1391             res += '(unsupported) '
1392         if fdict.get('format_note') is not None:
1393             res += fdict['format_note'] + ' '
1394         if fdict.get('tbr') is not None:
1395             res += '%4dk ' % fdict['tbr']
1396         if fdict.get('container') is not None:
1397             if res:
1398                 res += ', '
1399             res += '%s container' % fdict['container']
1400         if (fdict.get('vcodec') is not None and
1401                 fdict.get('vcodec') != 'none'):
1402             if res:
1403                 res += ', '
1404             res += fdict['vcodec']
1405             if fdict.get('vbr') is not None:
1406                 res += '@'
1407         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1408             res += 'video@'
1409         if fdict.get('vbr') is not None:
1410             res += '%4dk' % fdict['vbr']
1411         if fdict.get('fps') is not None:
1412             res += ', %sfps' % fdict['fps']
1413         if fdict.get('acodec') is not None:
1414             if res:
1415                 res += ', '
1416             if fdict['acodec'] == 'none':
1417                 res += 'video only'
1418             else:
1419                 res += '%-5s' % fdict['acodec']
1420         elif fdict.get('abr') is not None:
1421             if res:
1422                 res += ', '
1423             res += 'audio'
1424         if fdict.get('abr') is not None:
1425             res += '@%3dk' % fdict['abr']
1426         if fdict.get('asr') is not None:
1427             res += ' (%5dHz)' % fdict['asr']
1428         if fdict.get('filesize') is not None:
1429             if res:
1430                 res += ', '
1431             res += format_bytes(fdict['filesize'])
1432         elif fdict.get('filesize_approx') is not None:
1433             if res:
1434                 res += ', '
1435             res += '~' + format_bytes(fdict['filesize_approx'])
1436         return res
1437
1438     def list_formats(self, info_dict):
1439         def line(format, idlen=20):
1440             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1441                 format['format_id'],
1442                 format['ext'],
1443                 self.format_resolution(format),
1444                 self._format_note(format),
1445             ))
1446
1447         formats = info_dict.get('formats', [info_dict])
1448         idlen = max(len('format code'),
1449                     max(len(f['format_id']) for f in formats))
1450         formats_s = [
1451             line(f, idlen) for f in formats
1452             if f.get('preference') is None or f['preference'] >= -1000]
1453         if len(formats) > 1:
1454             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1455             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1456
1457         header_line = line({
1458             'format_id': 'format code', 'ext': 'extension',
1459             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1460         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1461                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1462
1463     def urlopen(self, req):
1464         """ Start an HTTP download """
1465
1466         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1467         # always respected by websites, some tend to give out URLs with non percent-encoded
1468         # non-ASCII characters (see telemb.py, ard.py [#3412])
1469         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1470         # To work around aforementioned issue we will replace request's original URL with
1471         # percent-encoded one
1472         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1473         url = req if req_is_string else req.get_full_url()
1474         url_escaped = escape_url(url)
1475
1476         # Substitute URL if any change after escaping
1477         if url != url_escaped:
1478             if req_is_string:
1479                 req = url_escaped
1480             else:
1481                 req = compat_urllib_request.Request(
1482                     url_escaped, data=req.data, headers=req.headers,
1483                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1484
1485         return self._opener.open(req, timeout=self._socket_timeout)
1486
1487     def print_debug_header(self):
1488         if not self.params.get('verbose'):
1489             return
1490
1491         if type('') is not compat_str:
1492             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1493             self.report_warning(
1494                 'Your Python is broken! Update to a newer and supported version')
1495
1496         stdout_encoding = getattr(
1497             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1498         encoding_str = (
1499             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1500                 locale.getpreferredencoding(),
1501                 sys.getfilesystemencoding(),
1502                 stdout_encoding,
1503                 self.get_encoding()))
1504         write_string(encoding_str, encoding=None)
1505
1506         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1507         try:
1508             sp = subprocess.Popen(
1509                 ['git', 'rev-parse', '--short', 'HEAD'],
1510                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1511                 cwd=os.path.dirname(os.path.abspath(__file__)))
1512             out, err = sp.communicate()
1513             out = out.decode().strip()
1514             if re.match('[0-9a-f]+', out):
1515                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1516         except:
1517             try:
1518                 sys.exc_clear()
1519             except:
1520                 pass
1521         self._write_string('[debug] Python version %s - %s\n' % (
1522             platform.python_version(), platform_name()))
1523
1524         exe_versions = FFmpegPostProcessor.get_versions()
1525         exe_versions['rtmpdump'] = rtmpdump_version()
1526         exe_str = ', '.join(
1527             '%s %s' % (exe, v)
1528             for exe, v in sorted(exe_versions.items())
1529             if v
1530         )
1531         if not exe_str:
1532             exe_str = 'none'
1533         self._write_string('[debug] exe versions: %s\n' % exe_str)
1534
1535         proxy_map = {}
1536         for handler in self._opener.handlers:
1537             if hasattr(handler, 'proxies'):
1538                 proxy_map.update(handler.proxies)
1539         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1540
1541         if self.params.get('call_home', False):
1542             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1543             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1544             latest_version = self.urlopen(
1545                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1546             if version_tuple(latest_version) > version_tuple(__version__):
1547                 self.report_warning(
1548                     'You are using an outdated version (newest version: %s)! '
1549                     'See https://yt-dl.org/update if you need help updating.' %
1550                     latest_version)
1551
1552     def _setup_opener(self):
1553         timeout_val = self.params.get('socket_timeout')
1554         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1555
1556         opts_cookiefile = self.params.get('cookiefile')
1557         opts_proxy = self.params.get('proxy')
1558
1559         if opts_cookiefile is None:
1560             self.cookiejar = compat_cookiejar.CookieJar()
1561         else:
1562             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1563                 opts_cookiefile)
1564             if os.access(opts_cookiefile, os.R_OK):
1565                 self.cookiejar.load()
1566
1567         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1568             self.cookiejar)
1569         if opts_proxy is not None:
1570             if opts_proxy == '':
1571                 proxies = {}
1572             else:
1573                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1574         else:
1575             proxies = compat_urllib_request.getproxies()
1576             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1577             if 'http' in proxies and 'https' not in proxies:
1578                 proxies['https'] = proxies['http']
1579         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1580
1581         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1582         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1583         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1584         opener = compat_urllib_request.build_opener(
1585             https_handler, proxy_handler, cookie_processor, ydlh)
1586         # Delete the default user-agent header, which would otherwise apply in
1587         # cases where our custom HTTP handler doesn't come into play
1588         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1589         opener.addheaders = []
1590         self._opener = opener
1591
1592     def encode(self, s):
1593         if isinstance(s, bytes):
1594             return s  # Already encoded
1595
1596         try:
1597             return s.encode(self.get_encoding())
1598         except UnicodeEncodeError as err:
1599             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1600             raise
1601
1602     def get_encoding(self):
1603         encoding = self.params.get('encoding')
1604         if encoding is None:
1605             encoding = preferredencoding()
1606         return encoding