youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlist_items:    Specific indices of playlist to download.
 141     playlistreverse:   Download playlist items in reverse order.
 142     matchtitle:        Download only matching titles.
 143     rejecttitle:       Reject downloads for matching titles.
 144     logger:            Log messages to a logging.Logger instance.
 145     logtostderr:       Log messages to stderr instead of stdout.
 146     writedescription:  Write the video description to a .description file
 147     writeinfojson:     Write the video description to a .info.json file
 148     writeannotations:  Write the video annotations to a .annotations.xml file
 149     writethumbnail:    Write the thumbnail image to a file
 150     write_all_thumbnails:  Write all thumbnail formats to files
 151     writesubtitles:    Write the video subtitles to a file
 152     writeautomaticsub: Write the automatic subtitles to a file
 153     allsubtitles:      Downloads all the subtitles of the video
 154                        (requires writesubtitles or writeautomaticsub)
 155     listsubtitles:     Lists all available subtitles for the video
 156     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 157     subtitleslangs:    List of languages of the subtitles to download
 158     keepvideo:         Keep the video file after post-processing
 159     daterange:         A DateRange object, download only if the upload_date is in the range.
 160     skip_download:     Skip the actual download of the video file
 161     cachedir:          Location of the cache files in the filesystem.
 162                        False to disable filesystem cache.
 163     noplaylist:        Download single video instead of a playlist if in doubt.
 164     age_limit:         An integer representing the user's age in years.
 165                        Unsuitable videos for the given age are skipped.
 166     min_views:         An integer representing the minimum view count the video
 167                        must have in order to not be skipped.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     max_views:         An integer representing the maximum view count.
 171                        Videos that are more popular than that are not
 172                        downloaded.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     download_archive:  File name of a file where all downloads are recorded.
 176                        Videos already present in the file are not downloaded
 177                        again.
 178     cookiefile:        File name where cookies should be read from and dumped to.
 179     nocheckcertificate:Do not verify SSL certificates
 180     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 181                        At the moment, this is only supported by YouTube.
 182     proxy:             URL of the proxy server to use
 183     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 184     bidi_workaround:   Work around buggy terminals without bidirectional text
 185                        support, using fridibi
 186     debug_printtraffic:Print out sent and received HTTP traffic
 187     include_ads:       Download ads as well
 188     default_search:    Prepend this string if an input url is not valid.
 189                        'auto' for elaborate guessing
 190     encoding:          Use this encoding instead of the system-specified.
 191     extract_flat:      Do not resolve URLs, return the immediate result.
 192                        Pass in 'in_playlist' to only show this behavior for
 193                        playlist items.
 194     postprocessors:    A list of dictionaries, each with an entry
 195                        * key:  The name of the postprocessor. See
 196                                youtube_dl/postprocessor/__init__.py for a list.
 197                        as well as any further keyword arguments for the
 198                        postprocessor.
 199     progress_hooks:    A list of functions that get called on download
 200                        progress, with a dictionary with the entries
 201                        * filename: The final filename
 202                        * status: One of "downloading" and "finished"
 203
 204                        The dict may also have some of the following entries:
 205
 206                        * downloaded_bytes: Bytes on disk
 207                        * total_bytes: Size of the whole file, None if unknown
 208                        * tmpfilename: The filename we're currently writing to
 209                        * eta: The estimated time in seconds, None if unknown
 210                        * speed: The download speed in bytes/second, None if
 211                                 unknown
 212
 213                        Progress hooks are guaranteed to be called at least once
 214                        (with status "finished") if the download is successful.
 215     merge_output_format: Extension to use when merging formats.
 216     fixup:             Automatically correct known faults of the file.
 217                        One of:
 218                        - "never": do nothing
 219                        - "warn": only emit a warning
 220                        - "detect_or_warn": check whether we can do anything
 221                                            about it, warn otherwise (default)
 222     source_address:    (Experimental) Client-side IP address to bind to.
 223     call_home:         Boolean, true iff we are allowed to contact the
 224                        youtube-dl servers for debugging.
 225     sleep_interval:    Number of seconds to sleep before each download.
 226     external_downloader:  Executable of the external downloader to call.
 227     listformats:       Print an overview of available video formats and exit.
 228     list_thumbnails:   Print a table of all thumbnails and exit.
 229
 230
 231     The following parameters are not used by YoutubeDL itself, they are used by
 232     the FileDownloader:
 233     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 234     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 235     xattr_set_filesize.
 236
 237     The following options are used by the post processors:
 238     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 239                        otherwise prefer avconv.
 240     exec_cmd:          Arbitrary command to run after downloading
 241     """
 242
 243     params = None
 244     _ies = []
 245     _pps = []
 246     _download_retcode = None
 247     _num_downloads = None
 248     _screen_file = None
 249
 250     def __init__(self, params=None, auto_init=True):
 251         """Create a FileDownloader object with the given options."""
 252         if params is None:
 253             params = {}
 254         self._ies = []
 255         self._ies_instances = {}
 256         self._pps = []
 257         self._progress_hooks = []
 258         self._download_retcode = 0
 259         self._num_downloads = 0
 260         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 261         self._err_file = sys.stderr
 262         self.params = params
 263         self.cache = Cache(self)
 264
 265         if params.get('bidi_workaround', False):
 266             try:
 267                 import pty
 268                 master, slave = pty.openpty()
 269                 width = get_term_width()
 270                 if width is None:
 271                     width_args = []
 272                 else:
 273                     width_args = ['-w', str(width)]
 274                 sp_kwargs = dict(
 275                     stdin=subprocess.PIPE,
 276                     stdout=slave,
 277                     stderr=self._err_file)
 278                 try:
 279                     self._output_process = subprocess.Popen(
 280                         ['bidiv'] + width_args, **sp_kwargs
 281                     )
 282                 except OSError:
 283                     self._output_process = subprocess.Popen(
 284                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 285                 self._output_channel = os.fdopen(master, 'rb')
 286             except OSError as ose:
 287                 if ose.errno == 2:
 288                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 289                 else:
 290                     raise
 291
 292         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 293                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 294                 and not params.get('restrictfilenames', False)):
 295             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 296             self.report_warning(
 297                 'Assuming --restrict-filenames since file system encoding '
 298                 'cannot encode all characters. '
 299                 'Set the LC_ALL environment variable to fix this.')
 300             self.params['restrictfilenames'] = True
 301
 302         if '%(stitle)s' in self.params.get('outtmpl', ''):
 303             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 304
 305         self._setup_opener()
 306
 307         if auto_init:
 308             self.print_debug_header()
 309             self.add_default_info_extractors()
 310
 311         for pp_def_raw in self.params.get('postprocessors', []):
 312             pp_class = get_postprocessor(pp_def_raw['key'])
 313             pp_def = dict(pp_def_raw)
 314             del pp_def['key']
 315             pp = pp_class(self, **compat_kwargs(pp_def))
 316             self.add_post_processor(pp)
 317
 318         for ph in self.params.get('progress_hooks', []):
 319             self.add_progress_hook(ph)
 320
 321     def warn_if_short_id(self, argv):
 322         # short YouTube ID starting with dash?
 323         idxs = [
 324             i for i, a in enumerate(argv)
 325             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 326         if idxs:
 327             correct_argv = (
 328                 ['youtube-dl'] +
 329                 [a for i, a in enumerate(argv) if i not in idxs] +
 330                 ['--'] + [argv[i] for i in idxs]
 331             )
 332             self.report_warning(
 333                 'Long argument string detected. '
 334                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 335                 args_to_str(correct_argv))
 336
 337     def add_info_extractor(self, ie):
 338         """Add an InfoExtractor object to the end of the list."""
 339         self._ies.append(ie)
 340         self._ies_instances[ie.ie_key()] = ie
 341         ie.set_downloader(self)
 342
 343     def get_info_extractor(self, ie_key):
 344         """
 345         Get an instance of an IE with name ie_key, it will try to get one from
 346         the _ies list, if there's no instance it will create a new one and add
 347         it to the extractor list.
 348         """
 349         ie = self._ies_instances.get(ie_key)
 350         if ie is None:
 351             ie = get_info_extractor(ie_key)()
 352             self.add_info_extractor(ie)
 353         return ie
 354
 355     def add_default_info_extractors(self):
 356         """
 357         Add the InfoExtractors returned by gen_extractors to the end of the list
 358         """
 359         for ie in gen_extractors():
 360             self.add_info_extractor(ie)
 361
 362     def add_post_processor(self, pp):
 363         """Add a PostProcessor object to the end of the chain."""
 364         self._pps.append(pp)
 365         pp.set_downloader(self)
 366
 367     def add_progress_hook(self, ph):
 368         """Add the progress hook (currently only for the file downloader)"""
 369         self._progress_hooks.append(ph)
 370
 371     def _bidi_workaround(self, message):
 372         if not hasattr(self, '_output_channel'):
 373             return message
 374
 375         assert hasattr(self, '_output_process')
 376         assert isinstance(message, compat_str)
 377         line_count = message.count('\n') + 1
 378         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 379         self._output_process.stdin.flush()
 380         res = ''.join(self._output_channel.readline().decode('utf-8')
 381                       for _ in range(line_count))
 382         return res[:-len('\n')]
 383
 384     def to_screen(self, message, skip_eol=False):
 385         """Print message to stdout if not in quiet mode."""
 386         return self.to_stdout(message, skip_eol, check_quiet=True)
 387
 388     def _write_string(self, s, out=None):
 389         write_string(s, out=out, encoding=self.params.get('encoding'))
 390
 391     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 392         """Print message to stdout if not in quiet mode."""
 393         if self.params.get('logger'):
 394             self.params['logger'].debug(message)
 395         elif not check_quiet or not self.params.get('quiet', False):
 396             message = self._bidi_workaround(message)
 397             terminator = ['\n', ''][skip_eol]
 398             output = message + terminator
 399
 400             self._write_string(output, self._screen_file)
 401
 402     def to_stderr(self, message):
 403         """Print message to stderr."""
 404         assert isinstance(message, compat_str)
 405         if self.params.get('logger'):
 406             self.params['logger'].error(message)
 407         else:
 408             message = self._bidi_workaround(message)
 409             output = message + '\n'
 410             self._write_string(output, self._err_file)
 411
 412     def to_console_title(self, message):
 413         if not self.params.get('consoletitle', False):
 414             return
 415         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 416             # c_wchar_p() might not be necessary if `message` is
 417             # already of type unicode()
 418             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 419         elif 'TERM' in os.environ:
 420             self._write_string('\033]0;%s\007' % message, self._screen_file)
 421
 422     def save_console_title(self):
 423         if not self.params.get('consoletitle', False):
 424             return
 425         if 'TERM' in os.environ:
 426             # Save the title on stack
 427             self._write_string('\033[22;0t', self._screen_file)
 428
 429     def restore_console_title(self):
 430         if not self.params.get('consoletitle', False):
 431             return
 432         if 'TERM' in os.environ:
 433             # Restore the title from stack
 434             self._write_string('\033[23;0t', self._screen_file)
 435
 436     def __enter__(self):
 437         self.save_console_title()
 438         return self
 439
 440     def __exit__(self, *args):
 441         self.restore_console_title()
 442
 443         if self.params.get('cookiefile') is not None:
 444             self.cookiejar.save()
 445
 446     def trouble(self, message=None, tb=None):
 447         """Determine action to take when a download problem appears.
 448
 449         Depending on if the downloader has been configured to ignore
 450         download errors or not, this method may throw an exception or
 451         not when errors are found, after printing the message.
 452
 453         tb, if given, is additional traceback information.
 454         """
 455         if message is not None:
 456             self.to_stderr(message)
 457         if self.params.get('verbose'):
 458             if tb is None:
 459                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 460                     tb = ''
 461                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 462                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 463                     tb += compat_str(traceback.format_exc())
 464                 else:
 465                     tb_data = traceback.format_list(traceback.extract_stack())
 466                     tb = ''.join(tb_data)
 467             self.to_stderr(tb)
 468         if not self.params.get('ignoreerrors', False):
 469             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 470                 exc_info = sys.exc_info()[1].exc_info
 471             else:
 472                 exc_info = sys.exc_info()
 473             raise DownloadError(message, exc_info)
 474         self._download_retcode = 1
 475
 476     def report_warning(self, message):
 477         '''
 478         Print the message to stderr, it will be prefixed with 'WARNING:'
 479         If stderr is a tty file the 'WARNING:' will be colored
 480         '''
 481         if self.params.get('logger') is not None:
 482             self.params['logger'].warning(message)
 483         else:
 484             if self.params.get('no_warnings'):
 485                 return
 486             if self._err_file.isatty() and os.name != 'nt':
 487                 _msg_header = '\033[0;33mWARNING:\033[0m'
 488             else:
 489                 _msg_header = 'WARNING:'
 490             warning_message = '%s %s' % (_msg_header, message)
 491             self.to_stderr(warning_message)
 492
 493     def report_error(self, message, tb=None):
 494         '''
 495         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 496         in red if stderr is a tty file.
 497         '''
 498         if self._err_file.isatty() and os.name != 'nt':
 499             _msg_header = '\033[0;31mERROR:\033[0m'
 500         else:
 501             _msg_header = 'ERROR:'
 502         error_message = '%s %s' % (_msg_header, message)
 503         self.trouble(error_message, tb)
 504
 505     def report_file_already_downloaded(self, file_name):
 506         """Report file has already been fully downloaded."""
 507         try:
 508             self.to_screen('[download] %s has already been downloaded' % file_name)
 509         except UnicodeEncodeError:
 510             self.to_screen('[download] The file has already been downloaded')
 511
 512     def prepare_filename(self, info_dict):
 513         """Generate the output filename."""
 514         try:
 515             template_dict = dict(info_dict)
 516
 517             template_dict['epoch'] = int(time.time())
 518             autonumber_size = self.params.get('autonumber_size')
 519             if autonumber_size is None:
 520                 autonumber_size = 5
 521             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 522             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 523             if template_dict.get('playlist_index') is not None:
 524                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 525             if template_dict.get('resolution') is None:
 526                 if template_dict.get('width') and template_dict.get('height'):
 527                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 528                 elif template_dict.get('height'):
 529                     template_dict['resolution'] = '%sp' % template_dict['height']
 530                 elif template_dict.get('width'):
 531                     template_dict['resolution'] = '?x%d' % template_dict['width']
 532
 533             sanitize = lambda k, v: sanitize_filename(
 534                 compat_str(v),
 535                 restricted=self.params.get('restrictfilenames'),
 536                 is_id=(k == 'id'))
 537             template_dict = dict((k, sanitize(k, v))
 538                                  for k, v in template_dict.items()
 539                                  if v is not None)
 540             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 541
 542             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 543             tmpl = compat_expanduser(outtmpl)
 544             filename = tmpl % template_dict
 545             return filename
 546         except ValueError as err:
 547             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 548             return None
 549
 550     def _match_entry(self, info_dict):
 551         """ Returns None iff the file should be downloaded """
 552
 553         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 554         if 'title' in info_dict:
 555             # This can happen when we're just evaluating the playlist
 556             title = info_dict['title']
 557             matchtitle = self.params.get('matchtitle', False)
 558             if matchtitle:
 559                 if not re.search(matchtitle, title, re.IGNORECASE):
 560                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 561             rejecttitle = self.params.get('rejecttitle', False)
 562             if rejecttitle:
 563                 if re.search(rejecttitle, title, re.IGNORECASE):
 564                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 565         date = info_dict.get('upload_date', None)
 566         if date is not None:
 567             dateRange = self.params.get('daterange', DateRange())
 568             if date not in dateRange:
 569                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 570         view_count = info_dict.get('view_count', None)
 571         if view_count is not None:
 572             min_views = self.params.get('min_views')
 573             if min_views is not None and view_count < min_views:
 574                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 575             max_views = self.params.get('max_views')
 576             if max_views is not None and view_count > max_views:
 577                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 578         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 579             return 'Skipping "%s" because it is age restricted' % title
 580         if self.in_download_archive(info_dict):
 581             return '%s has already been recorded in archive' % video_title
 582         return None
 583
 584     @staticmethod
 585     def add_extra_info(info_dict, extra_info):
 586         '''Set the keys from extra_info in info dict if they are missing'''
 587         for key, value in extra_info.items():
 588             info_dict.setdefault(key, value)
 589
 590     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 591                      process=True):
 592         '''
 593         Returns a list with a dictionary for each video we find.
 594         If 'download', also downloads the videos.
 595         extra_info is a dict containing the extra values to add to each result
 596          '''
 597
 598         if ie_key:
 599             ies = [self.get_info_extractor(ie_key)]
 600         else:
 601             ies = self._ies
 602
 603         for ie in ies:
 604             if not ie.suitable(url):
 605                 continue
 606
 607             if not ie.working():
 608                 self.report_warning('The program functionality for this site has been marked as broken, '
 609                                     'and will probably not work.')
 610
 611             try:
 612                 ie_result = ie.extract(url)
 613                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 614                     break
 615                 if isinstance(ie_result, list):
 616                     # Backwards compatibility: old IE result format
 617                     ie_result = {
 618                         '_type': 'compat_list',
 619                         'entries': ie_result,
 620                     }
 621                 self.add_default_extra_info(ie_result, ie, url)
 622                 if process:
 623                     return self.process_ie_result(ie_result, download, extra_info)
 624                 else:
 625                     return ie_result
 626             except ExtractorError as de:  # An error we somewhat expected
 627                 self.report_error(compat_str(de), de.format_traceback())
 628                 break
 629             except MaxDownloadsReached:
 630                 raise
 631             except Exception as e:
 632                 if self.params.get('ignoreerrors', False):
 633                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 634                     break
 635                 else:
 636                     raise
 637         else:
 638             self.report_error('no suitable InfoExtractor for URL %s' % url)
 639
 640     def add_default_extra_info(self, ie_result, ie, url):
 641         self.add_extra_info(ie_result, {
 642             'extractor': ie.IE_NAME,
 643             'webpage_url': url,
 644             'webpage_url_basename': url_basename(url),
 645             'extractor_key': ie.ie_key(),
 646         })
 647
 648     def process_ie_result(self, ie_result, download=True, extra_info={}):
 649         """
 650         Take the result of the ie(may be modified) and resolve all unresolved
 651         references (URLs, playlist items).
 652
 653         It will also download the videos if 'download'.
 654         Returns the resolved ie_result.
 655         """
 656
 657         result_type = ie_result.get('_type', 'video')
 658
 659         if result_type in ('url', 'url_transparent'):
 660             extract_flat = self.params.get('extract_flat', False)
 661             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 662                     extract_flat is True):
 663                 if self.params.get('forcejson', False):
 664                     self.to_stdout(json.dumps(ie_result))
 665                 return ie_result
 666
 667         if result_type == 'video':
 668             self.add_extra_info(ie_result, extra_info)
 669             return self.process_video_result(ie_result, download=download)
 670         elif result_type == 'url':
 671             # We have to add extra_info to the results because it may be
 672             # contained in a playlist
 673             return self.extract_info(ie_result['url'],
 674                                      download,
 675                                      ie_key=ie_result.get('ie_key'),
 676                                      extra_info=extra_info)
 677         elif result_type == 'url_transparent':
 678             # Use the information from the embedding page
 679             info = self.extract_info(
 680                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 681                 extra_info=extra_info, download=False, process=False)
 682
 683             force_properties = dict(
 684                 (k, v) for k, v in ie_result.items() if v is not None)
 685             for f in ('_type', 'url'):
 686                 if f in force_properties:
 687                     del force_properties[f]
 688             new_result = info.copy()
 689             new_result.update(force_properties)
 690
 691             assert new_result.get('_type') != 'url_transparent'
 692
 693             return self.process_ie_result(
 694                 new_result, download=download, extra_info=extra_info)
 695         elif result_type == 'playlist' or result_type == 'multi_video':
 696             # We process each entry in the playlist
 697             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 698             self.to_screen('[download] Downloading playlist: %s' % playlist)
 699
 700             playlist_results = []
 701
 702             playliststart = self.params.get('playliststart', 1) - 1
 703             playlistend = self.params.get('playlistend', None)
 704             # For backwards compatibility, interpret -1 as whole list
 705             if playlistend == -1:
 706                 playlistend = None
 707
 708             playlistitems_str = self.params.get('playlist_items', None)
 709             playlistitems = None
 710             if playlistitems_str is not None:
 711                 def iter_playlistitems(format):
 712                     for string_segment in format.split(','):
 713                         if '-' in string_segment:
 714                             start, end = string_segment.split('-')
 715                             for item in range(int(start), int(end) + 1):
 716                                 yield int(item)
 717                         else:
 718                             yield int(string_segment)
 719                 playlistitems = iter_playlistitems(playlistitems_str)
 720
 721             ie_entries = ie_result['entries']
 722             if isinstance(ie_entries, list):
 723                 n_all_entries = len(ie_entries)
 724                 if playlistitems:
 725                     entries = [ie_entries[i - 1] for i in playlistitems]
 726                 else:
 727                     entries = ie_entries[playliststart:playlistend]
 728                 n_entries = len(entries)
 729                 self.to_screen(
 730                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 731                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 732             elif isinstance(ie_entries, PagedList):
 733                 if playlistitems:
 734                     entries = []
 735                     for item in playlistitems:
 736                         entries.extend(ie_entries.getslice(
 737                             item - 1, item
 738                         ))
 739                 else:
 740                     entries = ie_entries.getslice(
 741                         playliststart, playlistend)
 742                 n_entries = len(entries)
 743                 self.to_screen(
 744                     "[%s] playlist %s: Downloading %d videos" %
 745                     (ie_result['extractor'], playlist, n_entries))
 746             else:  # iterable
 747                 if playlistitems:
 748                     entry_list = list(ie_entries)
 749                     entries = [entry_list[i - 1] for i in playlistitems]
 750                 else:
 751                     entries = list(itertools.islice(
 752                         ie_entries, playliststart, playlistend))
 753                 n_entries = len(entries)
 754                 self.to_screen(
 755                     "[%s] playlist %s: Downloading %d videos" %
 756                     (ie_result['extractor'], playlist, n_entries))
 757
 758             if self.params.get('playlistreverse', False):
 759                 entries = entries[::-1]
 760
 761             for i, entry in enumerate(entries, 1):
 762                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 763                 extra = {
 764                     'n_entries': n_entries,
 765                     'playlist': playlist,
 766                     'playlist_id': ie_result.get('id'),
 767                     'playlist_title': ie_result.get('title'),
 768                     'playlist_index': i + playliststart,
 769                     'extractor': ie_result['extractor'],
 770                     'webpage_url': ie_result['webpage_url'],
 771                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 772                     'extractor_key': ie_result['extractor_key'],
 773                 }
 774
 775                 reason = self._match_entry(entry)
 776                 if reason is not None:
 777                     self.to_screen('[download] ' + reason)
 778                     continue
 779
 780                 entry_result = self.process_ie_result(entry,
 781                                                       download=download,
 782                                                       extra_info=extra)
 783                 playlist_results.append(entry_result)
 784             ie_result['entries'] = playlist_results
 785             return ie_result
 786         elif result_type == 'compat_list':
 787             self.report_warning(
 788                 'Extractor %s returned a compat_list result. '
 789                 'It needs to be updated.' % ie_result.get('extractor'))
 790
 791             def _fixup(r):
 792                 self.add_extra_info(
 793                     r,
 794                     {
 795                         'extractor': ie_result['extractor'],
 796                         'webpage_url': ie_result['webpage_url'],
 797                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 798                         'extractor_key': ie_result['extractor_key'],
 799                     }
 800                 )
 801                 return r
 802             ie_result['entries'] = [
 803                 self.process_ie_result(_fixup(r), download, extra_info)
 804                 for r in ie_result['entries']
 805             ]
 806             return ie_result
 807         else:
 808             raise Exception('Invalid result type: %s' % result_type)
 809
 810     def _apply_format_filter(self, format_spec, available_formats):
 811         " Returns a tuple of the remaining format_spec and filtered formats "
 812
 813         OPERATORS = {
 814             '<': operator.lt,
 815             '<=': operator.le,
 816             '>': operator.gt,
 817             '>=': operator.ge,
 818             '=': operator.eq,
 819             '!=': operator.ne,
 820         }
 821         operator_rex = re.compile(r'''(?x)\s*\[
 822             (?P<key>width|height|tbr|abr|vbr|filesize)
 823             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 824             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 825             \]$
 826             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 827         m = operator_rex.search(format_spec)
 828         if not m:
 829             raise ValueError('Invalid format specification %r' % format_spec)
 830
 831         try:
 832             comparison_value = int(m.group('value'))
 833         except ValueError:
 834             comparison_value = parse_filesize(m.group('value'))
 835             if comparison_value is None:
 836                 comparison_value = parse_filesize(m.group('value') + 'B')
 837             if comparison_value is None:
 838                 raise ValueError(
 839                     'Invalid value %r in format specification %r' % (
 840                         m.group('value'), format_spec))
 841         op = OPERATORS[m.group('op')]
 842
 843         def _filter(f):
 844             actual_value = f.get(m.group('key'))
 845             if actual_value is None:
 846                 return m.group('none_inclusive')
 847             return op(actual_value, comparison_value)
 848         new_formats = [f for f in available_formats if _filter(f)]
 849
 850         new_format_spec = format_spec[:-len(m.group(0))]
 851         if not new_format_spec:
 852             new_format_spec = 'best'
 853
 854         return (new_format_spec, new_formats)
 855
 856     def select_format(self, format_spec, available_formats):
 857         while format_spec.endswith(']'):
 858             format_spec, available_formats = self._apply_format_filter(
 859                 format_spec, available_formats)
 860         if not available_formats:
 861             return None
 862
 863         if format_spec == 'best' or format_spec is None:
 864             return available_formats[-1]
 865         elif format_spec == 'worst':
 866             return available_formats[0]
 867         elif format_spec == 'bestaudio':
 868             audio_formats = [
 869                 f for f in available_formats
 870                 if f.get('vcodec') == 'none']
 871             if audio_formats:
 872                 return audio_formats[-1]
 873         elif format_spec == 'worstaudio':
 874             audio_formats = [
 875                 f for f in available_formats
 876                 if f.get('vcodec') == 'none']
 877             if audio_formats:
 878                 return audio_formats[0]
 879         elif format_spec == 'bestvideo':
 880             video_formats = [
 881                 f for f in available_formats
 882                 if f.get('acodec') == 'none']
 883             if video_formats:
 884                 return video_formats[-1]
 885         elif format_spec == 'worstvideo':
 886             video_formats = [
 887                 f for f in available_formats
 888                 if f.get('acodec') == 'none']
 889             if video_formats:
 890                 return video_formats[0]
 891         else:
 892             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 893             if format_spec in extensions:
 894                 filter_f = lambda f: f['ext'] == format_spec
 895             else:
 896                 filter_f = lambda f: f['format_id'] == format_spec
 897             matches = list(filter(filter_f, available_formats))
 898             if matches:
 899                 return matches[-1]
 900         return None
 901
 902     def _calc_headers(self, info_dict):
 903         res = std_headers.copy()
 904
 905         add_headers = info_dict.get('http_headers')
 906         if add_headers:
 907             res.update(add_headers)
 908
 909         cookies = self._calc_cookies(info_dict)
 910         if cookies:
 911             res['Cookie'] = cookies
 912
 913         return res
 914
 915     def _calc_cookies(self, info_dict):
 916         class _PseudoRequest(object):
 917             def __init__(self, url):
 918                 self.url = url
 919                 self.headers = {}
 920                 self.unverifiable = False
 921
 922             def add_unredirected_header(self, k, v):
 923                 self.headers[k] = v
 924
 925             def get_full_url(self):
 926                 return self.url
 927
 928             def is_unverifiable(self):
 929                 return self.unverifiable
 930
 931             def has_header(self, h):
 932                 return h in self.headers
 933
 934         pr = _PseudoRequest(info_dict['url'])
 935         self.cookiejar.add_cookie_header(pr)
 936         return pr.headers.get('Cookie')
 937
 938     def process_video_result(self, info_dict, download=True):
 939         assert info_dict.get('_type', 'video') == 'video'
 940
 941         if 'id' not in info_dict:
 942             raise ExtractorError('Missing "id" field in extractor result')
 943         if 'title' not in info_dict:
 944             raise ExtractorError('Missing "title" field in extractor result')
 945
 946         if 'playlist' not in info_dict:
 947             # It isn't part of a playlist
 948             info_dict['playlist'] = None
 949             info_dict['playlist_index'] = None
 950
 951         thumbnails = info_dict.get('thumbnails')
 952         if thumbnails is None:
 953             thumbnail = info_dict.get('thumbnail')
 954             if thumbnail:
 955                 thumbnails = [{'url': thumbnail}]
 956         if thumbnails:
 957             thumbnails.sort(key=lambda t: (
 958                 t.get('preference'), t.get('width'), t.get('height'),
 959                 t.get('id'), t.get('url')))
 960             for t in thumbnails:
 961                 if 'width' in t and 'height' in t:
 962                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 963
 964         if thumbnails and 'thumbnail' not in info_dict:
 965             info_dict['thumbnail'] = thumbnails[-1]['url']
 966
 967         if 'display_id' not in info_dict and 'id' in info_dict:
 968             info_dict['display_id'] = info_dict['id']
 969
 970         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 971             # Working around negative timestamps in Windows
 972             # (see http://bugs.python.org/issue1646728)
 973             if info_dict['timestamp'] < 0 and os.name == 'nt':
 974                 info_dict['timestamp'] = 0
 975             upload_date = datetime.datetime.utcfromtimestamp(
 976                 info_dict['timestamp'])
 977             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 978
 979         # This extractors handle format selection themselves
 980         if info_dict['extractor'] in ['Youku']:
 981             if download:
 982                 self.process_info(info_dict)
 983             return info_dict
 984
 985         # We now pick which formats have to be downloaded
 986         if info_dict.get('formats') is None:
 987             # There's only one format available
 988             formats = [info_dict]
 989         else:
 990             formats = info_dict['formats']
 991
 992         if not formats:
 993             raise ExtractorError('No video formats found!')
 994
 995         # We check that all the formats have the format and format_id fields
 996         for i, format in enumerate(formats):
 997             if 'url' not in format:
 998                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 999
1000             if format.get('format_id') is None:
1001                 format['format_id'] = compat_str(i)
1002             if format.get('format') is None:
1003                 format['format'] = '{id} - {res}{note}'.format(
1004                     id=format['format_id'],
1005                     res=self.format_resolution(format),
1006                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1007                 )
1008             # Automatically determine file extension if missing
1009             if 'ext' not in format:
1010                 format['ext'] = determine_ext(format['url']).lower()
1011             # Add HTTP headers, so that external programs can use them from the
1012             # json output
1013             full_format_info = info_dict.copy()
1014             full_format_info.update(format)
1015             format['http_headers'] = self._calc_headers(full_format_info)
1016
1017         format_limit = self.params.get('format_limit', None)
1018         if format_limit:
1019             formats = list(takewhile_inclusive(
1020                 lambda f: f['format_id'] != format_limit, formats
1021             ))
1022
1023         # TODO Central sorting goes here
1024
1025         if formats[0] is not info_dict:
1026             # only set the 'formats' fields if the original info_dict list them
1027             # otherwise we end up with a circular reference, the first (and unique)
1028             # element in the 'formats' field in info_dict is info_dict itself,
1029             # wich can't be exported to json
1030             info_dict['formats'] = formats
1031         if self.params.get('listformats'):
1032             self.list_formats(info_dict)
1033             return
1034         if self.params.get('list_thumbnails'):
1035             self.list_thumbnails(info_dict)
1036             return
1037
1038         req_format = self.params.get('format')
1039         if req_format is None:
1040             req_format = 'best'
1041         formats_to_download = []
1042         # The -1 is for supporting YoutubeIE
1043         if req_format in ('-1', 'all'):
1044             formats_to_download = formats
1045         else:
1046             for rfstr in req_format.split(','):
1047                 # We can accept formats requested in the format: 34/5/best, we pick
1048                 # the first that is available, starting from left
1049                 req_formats = rfstr.split('/')
1050                 for rf in req_formats:
1051                     if re.match(r'.+?\+.+?', rf) is not None:
1052                         # Two formats have been requested like '137+139'
1053                         format_1, format_2 = rf.split('+')
1054                         formats_info = (self.select_format(format_1, formats),
1055                                         self.select_format(format_2, formats))
1056                         if all(formats_info):
1057                             # The first format must contain the video and the
1058                             # second the audio
1059                             if formats_info[0].get('vcodec') == 'none':
1060                                 self.report_error('The first format must '
1061                                                   'contain the video, try using '
1062                                                   '"-f %s+%s"' % (format_2, format_1))
1063                                 return
1064                             output_ext = (
1065                                 formats_info[0]['ext']
1066                                 if self.params.get('merge_output_format') is None
1067                                 else self.params['merge_output_format'])
1068                             selected_format = {
1069                                 'requested_formats': formats_info,
1070                                 'format': rf,
1071                                 'ext': formats_info[0]['ext'],
1072                                 'width': formats_info[0].get('width'),
1073                                 'height': formats_info[0].get('height'),
1074                                 'resolution': formats_info[0].get('resolution'),
1075                                 'fps': formats_info[0].get('fps'),
1076                                 'vcodec': formats_info[0].get('vcodec'),
1077                                 'vbr': formats_info[0].get('vbr'),
1078                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1079                                 'acodec': formats_info[1].get('acodec'),
1080                                 'abr': formats_info[1].get('abr'),
1081                                 'ext': output_ext,
1082                             }
1083                         else:
1084                             selected_format = None
1085                     else:
1086                         selected_format = self.select_format(rf, formats)
1087                     if selected_format is not None:
1088                         formats_to_download.append(selected_format)
1089                         break
1090         if not formats_to_download:
1091             raise ExtractorError('requested format not available',
1092                                  expected=True)
1093
1094         if download:
1095             if len(formats_to_download) > 1:
1096                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1097             for format in formats_to_download:
1098                 new_info = dict(info_dict)
1099                 new_info.update(format)
1100                 self.process_info(new_info)
1101         # We update the info dict with the best quality format (backwards compatibility)
1102         info_dict.update(formats_to_download[-1])
1103         return info_dict
1104
1105     def process_info(self, info_dict):
1106         """Process a single resolved IE result."""
1107
1108         assert info_dict.get('_type', 'video') == 'video'
1109
1110         max_downloads = self.params.get('max_downloads')
1111         if max_downloads is not None:
1112             if self._num_downloads >= int(max_downloads):
1113                 raise MaxDownloadsReached()
1114
1115         info_dict['fulltitle'] = info_dict['title']
1116         if len(info_dict['title']) > 200:
1117             info_dict['title'] = info_dict['title'][:197] + '...'
1118
1119         # Keep for backwards compatibility
1120         info_dict['stitle'] = info_dict['title']
1121
1122         if 'format' not in info_dict:
1123             info_dict['format'] = info_dict['ext']
1124
1125         reason = self._match_entry(info_dict)
1126         if reason is not None:
1127             self.to_screen('[download] ' + reason)
1128             return
1129
1130         self._num_downloads += 1
1131
1132         filename = self.prepare_filename(info_dict)
1133
1134         # Forced printings
1135         if self.params.get('forcetitle', False):
1136             self.to_stdout(info_dict['fulltitle'])
1137         if self.params.get('forceid', False):
1138             self.to_stdout(info_dict['id'])
1139         if self.params.get('forceurl', False):
1140             if info_dict.get('requested_formats') is not None:
1141                 for f in info_dict['requested_formats']:
1142                     self.to_stdout(f['url'] + f.get('play_path', ''))
1143             else:
1144                 # For RTMP URLs, also include the playpath
1145                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1146         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1147             self.to_stdout(info_dict['thumbnail'])
1148         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1149             self.to_stdout(info_dict['description'])
1150         if self.params.get('forcefilename', False) and filename is not None:
1151             self.to_stdout(filename)
1152         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1153             self.to_stdout(formatSeconds(info_dict['duration']))
1154         if self.params.get('forceformat', False):
1155             self.to_stdout(info_dict['format'])
1156         if self.params.get('forcejson', False):
1157             info_dict['_filename'] = filename
1158             self.to_stdout(json.dumps(info_dict))
1159         if self.params.get('dump_single_json', False):
1160             info_dict['_filename'] = filename
1161
1162         # Do nothing else if in simulate mode
1163         if self.params.get('simulate', False):
1164             return
1165
1166         if filename is None:
1167             return
1168
1169         try:
1170             dn = os.path.dirname(encodeFilename(filename))
1171             if dn and not os.path.exists(dn):
1172                 os.makedirs(dn)
1173         except (OSError, IOError) as err:
1174             self.report_error('unable to create directory ' + compat_str(err))
1175             return
1176
1177         if self.params.get('writedescription', False):
1178             descfn = filename + '.description'
1179             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1180                 self.to_screen('[info] Video description is already present')
1181             elif info_dict.get('description') is None:
1182                 self.report_warning('There\'s no description to write.')
1183             else:
1184                 try:
1185                     self.to_screen('[info] Writing video description to: ' + descfn)
1186                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1187                         descfile.write(info_dict['description'])
1188                 except (OSError, IOError):
1189                     self.report_error('Cannot write description file ' + descfn)
1190                     return
1191
1192         if self.params.get('writeannotations', False):
1193             annofn = filename + '.annotations.xml'
1194             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1195                 self.to_screen('[info] Video annotations are already present')
1196             else:
1197                 try:
1198                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1199                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1200                         annofile.write(info_dict['annotations'])
1201                 except (KeyError, TypeError):
1202                     self.report_warning('There are no annotations to write.')
1203                 except (OSError, IOError):
1204                     self.report_error('Cannot write annotations file: ' + annofn)
1205                     return
1206
1207         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1208                                        self.params.get('writeautomaticsub')])
1209
1210         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1211             # subtitles download errors are already managed as troubles in relevant IE
1212             # that way it will silently go on when used with unsupporting IE
1213             subtitles = info_dict['subtitles']
1214             sub_format = self.params.get('subtitlesformat', 'srt')
1215             for sub_lang in subtitles.keys():
1216                 sub = subtitles[sub_lang]
1217                 if sub is None:
1218                     continue
1219                 try:
1220                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1221                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1222                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1223                     else:
1224                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1225                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1226                             subfile.write(sub)
1227                 except (OSError, IOError):
1228                     self.report_error('Cannot write subtitles file ' + sub_filename)
1229                     return
1230
1231         if self.params.get('writeinfojson', False):
1232             infofn = os.path.splitext(filename)[0] + '.info.json'
1233             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1234                 self.to_screen('[info] Video description metadata is already present')
1235             else:
1236                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1237                 try:
1238                     write_json_file(info_dict, infofn)
1239                 except (OSError, IOError):
1240                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1241                     return
1242
1243         self._write_thumbnails(info_dict, filename)
1244
1245         if not self.params.get('skip_download', False):
1246             try:
1247                 def dl(name, info):
1248                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1249                     for ph in self._progress_hooks:
1250                         fd.add_progress_hook(ph)
1251                     if self.params.get('verbose'):
1252                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1253                     return fd.download(name, info)
1254                 if info_dict.get('requested_formats') is not None:
1255                     downloaded = []
1256                     success = True
1257                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1258                     if not merger._executable:
1259                         postprocessors = []
1260                         self.report_warning('You have requested multiple '
1261                                             'formats but ffmpeg or avconv are not installed.'
1262                                             ' The formats won\'t be merged')
1263                     else:
1264                         postprocessors = [merger]
1265                     for f in info_dict['requested_formats']:
1266                         new_info = dict(info_dict)
1267                         new_info.update(f)
1268                         fname = self.prepare_filename(new_info)
1269                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1270                         downloaded.append(fname)
1271                         partial_success = dl(fname, new_info)
1272                         success = success and partial_success
1273                     info_dict['__postprocessors'] = postprocessors
1274                     info_dict['__files_to_merge'] = downloaded
1275                 else:
1276                     # Just a single file
1277                     success = dl(filename, info_dict)
1278             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1279                 self.report_error('unable to download video data: %s' % str(err))
1280                 return
1281             except (OSError, IOError) as err:
1282                 raise UnavailableVideoError(err)
1283             except (ContentTooShortError, ) as err:
1284                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1285                 return
1286
1287             if success:
1288                 # Fixup content
1289                 fixup_policy = self.params.get('fixup')
1290                 if fixup_policy is None:
1291                     fixup_policy = 'detect_or_warn'
1292
1293                 stretched_ratio = info_dict.get('stretched_ratio')
1294                 if stretched_ratio is not None and stretched_ratio != 1:
1295                     if fixup_policy == 'warn':
1296                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1297                             info_dict['id'], stretched_ratio))
1298                     elif fixup_policy == 'detect_or_warn':
1299                         stretched_pp = FFmpegFixupStretchedPP(self)
1300                         if stretched_pp.available:
1301                             info_dict.setdefault('__postprocessors', [])
1302                             info_dict['__postprocessors'].append(stretched_pp)
1303                         else:
1304                             self.report_warning(
1305                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1306                                     info_dict['id'], stretched_ratio))
1307                     else:
1308                         assert fixup_policy in ('ignore', 'never')
1309
1310                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1311                     if fixup_policy == 'warn':
1312                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1313                             info_dict['id']))
1314                     elif fixup_policy == 'detect_or_warn':
1315                         fixup_pp = FFmpegFixupM4aPP(self)
1316                         if fixup_pp.available:
1317                             info_dict.setdefault('__postprocessors', [])
1318                             info_dict['__postprocessors'].append(fixup_pp)
1319                         else:
1320                             self.report_warning(
1321                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1322                                     info_dict['id']))
1323                     else:
1324                         assert fixup_policy in ('ignore', 'never')
1325
1326                 try:
1327                     self.post_process(filename, info_dict)
1328                 except (PostProcessingError) as err:
1329                     self.report_error('postprocessing: %s' % str(err))
1330                     return
1331                 self.record_download_archive(info_dict)
1332
1333     def download(self, url_list):
1334         """Download a given list of URLs."""
1335         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1336         if (len(url_list) > 1 and
1337                 '%' not in outtmpl
1338                 and self.params.get('max_downloads') != 1):
1339             raise SameFileError(outtmpl)
1340
1341         for url in url_list:
1342             try:
1343                 # It also downloads the videos
1344                 res = self.extract_info(url)
1345             except UnavailableVideoError:
1346                 self.report_error('unable to download video')
1347             except MaxDownloadsReached:
1348                 self.to_screen('[info] Maximum number of downloaded files reached.')
1349                 raise
1350             else:
1351                 if self.params.get('dump_single_json', False):
1352                     self.to_stdout(json.dumps(res))
1353
1354         return self._download_retcode
1355
1356     def download_with_info_file(self, info_filename):
1357         with io.open(info_filename, 'r', encoding='utf-8') as f:
1358             info = json.load(f)
1359         try:
1360             self.process_ie_result(info, download=True)
1361         except DownloadError:
1362             webpage_url = info.get('webpage_url')
1363             if webpage_url is not None:
1364                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1365                 return self.download([webpage_url])
1366             else:
1367                 raise
1368         return self._download_retcode
1369
1370     def post_process(self, filename, ie_info):
1371         """Run all the postprocessors on the given file."""
1372         info = dict(ie_info)
1373         info['filepath'] = filename
1374         pps_chain = []
1375         if ie_info.get('__postprocessors') is not None:
1376             pps_chain.extend(ie_info['__postprocessors'])
1377         pps_chain.extend(self._pps)
1378         for pp in pps_chain:
1379             keep_video = None
1380             old_filename = info['filepath']
1381             try:
1382                 keep_video_wish, info = pp.run(info)
1383                 if keep_video_wish is not None:
1384                     if keep_video_wish:
1385                         keep_video = keep_video_wish
1386                     elif keep_video is None:
1387                         # No clear decision yet, let IE decide
1388                         keep_video = keep_video_wish
1389             except PostProcessingError as e:
1390                 self.report_error(e.msg)
1391             if keep_video is False and not self.params.get('keepvideo', False):
1392                 try:
1393                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1394                     os.remove(encodeFilename(old_filename))
1395                 except (IOError, OSError):
1396                     self.report_warning('Unable to remove downloaded video file')
1397
1398     def _make_archive_id(self, info_dict):
1399         # Future-proof against any change in case
1400         # and backwards compatibility with prior versions
1401         extractor = info_dict.get('extractor_key')
1402         if extractor is None:
1403             if 'id' in info_dict:
1404                 extractor = info_dict.get('ie_key')  # key in a playlist
1405         if extractor is None:
1406             return None  # Incomplete video information
1407         return extractor.lower() + ' ' + info_dict['id']
1408
1409     def in_download_archive(self, info_dict):
1410         fn = self.params.get('download_archive')
1411         if fn is None:
1412             return False
1413
1414         vid_id = self._make_archive_id(info_dict)
1415         if vid_id is None:
1416             return False  # Incomplete video information
1417
1418         try:
1419             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1420                 for line in archive_file:
1421                     if line.strip() == vid_id:
1422                         return True
1423         except IOError as ioe:
1424             if ioe.errno != errno.ENOENT:
1425                 raise
1426         return False
1427
1428     def record_download_archive(self, info_dict):
1429         fn = self.params.get('download_archive')
1430         if fn is None:
1431             return
1432         vid_id = self._make_archive_id(info_dict)
1433         assert vid_id
1434         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1435             archive_file.write(vid_id + '\n')
1436
1437     @staticmethod
1438     def format_resolution(format, default='unknown'):
1439         if format.get('vcodec') == 'none':
1440             return 'audio only'
1441         if format.get('resolution') is not None:
1442             return format['resolution']
1443         if format.get('height') is not None:
1444             if format.get('width') is not None:
1445                 res = '%sx%s' % (format['width'], format['height'])
1446             else:
1447                 res = '%sp' % format['height']
1448         elif format.get('width') is not None:
1449             res = '?x%d' % format['width']
1450         else:
1451             res = default
1452         return res
1453
1454     def _format_note(self, fdict):
1455         res = ''
1456         if fdict.get('ext') in ['f4f', 'f4m']:
1457             res += '(unsupported) '
1458         if fdict.get('format_note') is not None:
1459             res += fdict['format_note'] + ' '
1460         if fdict.get('tbr') is not None:
1461             res += '%4dk ' % fdict['tbr']
1462         if fdict.get('container') is not None:
1463             if res:
1464                 res += ', '
1465             res += '%s container' % fdict['container']
1466         if (fdict.get('vcodec') is not None and
1467                 fdict.get('vcodec') != 'none'):
1468             if res:
1469                 res += ', '
1470             res += fdict['vcodec']
1471             if fdict.get('vbr') is not None:
1472                 res += '@'
1473         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1474             res += 'video@'
1475         if fdict.get('vbr') is not None:
1476             res += '%4dk' % fdict['vbr']
1477         if fdict.get('fps') is not None:
1478             res += ', %sfps' % fdict['fps']
1479         if fdict.get('acodec') is not None:
1480             if res:
1481                 res += ', '
1482             if fdict['acodec'] == 'none':
1483                 res += 'video only'
1484             else:
1485                 res += '%-5s' % fdict['acodec']
1486         elif fdict.get('abr') is not None:
1487             if res:
1488                 res += ', '
1489             res += 'audio'
1490         if fdict.get('abr') is not None:
1491             res += '@%3dk' % fdict['abr']
1492         if fdict.get('asr') is not None:
1493             res += ' (%5dHz)' % fdict['asr']
1494         if fdict.get('filesize') is not None:
1495             if res:
1496                 res += ', '
1497             res += format_bytes(fdict['filesize'])
1498         elif fdict.get('filesize_approx') is not None:
1499             if res:
1500                 res += ', '
1501             res += '~' + format_bytes(fdict['filesize_approx'])
1502         return res
1503
1504     def list_formats(self, info_dict):
1505         def line(format, idlen=20):
1506             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1507                 format['format_id'],
1508                 format['ext'],
1509                 self.format_resolution(format),
1510                 self._format_note(format),
1511             ))
1512
1513         formats = info_dict.get('formats', [info_dict])
1514         idlen = max(len('format code'),
1515                     max(len(f['format_id']) for f in formats))
1516         formats_s = [
1517             line(f, idlen) for f in formats
1518             if f.get('preference') is None or f['preference'] >= -1000]
1519         if len(formats) > 1:
1520             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1521             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1522
1523         header_line = line({
1524             'format_id': 'format code', 'ext': 'extension',
1525             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1526         self.to_screen(
1527             '[info] Available formats for %s:\n%s\n%s' %
1528             (info_dict['id'], header_line, '\n'.join(formats_s)))
1529
1530     def list_thumbnails(self, info_dict):
1531         thumbnails = info_dict.get('thumbnails')
1532         if not thumbnails:
1533             tn_url = info_dict.get('thumbnail')
1534             if tn_url:
1535                 thumbnails = [{'id': '0', 'url': tn_url}]
1536             else:
1537                 self.to_screen(
1538                     '[info] No thumbnails present for %s' % info_dict['id'])
1539                 return
1540
1541         self.to_screen(
1542             '[info] Thumbnails for %s:' % info_dict['id'])
1543         self.to_screen(render_table(
1544             ['ID', 'width', 'height', 'URL'],
1545             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1546
1547     def urlopen(self, req):
1548         """ Start an HTTP download """
1549
1550         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1551         # always respected by websites, some tend to give out URLs with non percent-encoded
1552         # non-ASCII characters (see telemb.py, ard.py [#3412])
1553         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1554         # To work around aforementioned issue we will replace request's original URL with
1555         # percent-encoded one
1556         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1557         url = req if req_is_string else req.get_full_url()
1558         url_escaped = escape_url(url)
1559
1560         # Substitute URL if any change after escaping
1561         if url != url_escaped:
1562             if req_is_string:
1563                 req = url_escaped
1564             else:
1565                 req = compat_urllib_request.Request(
1566                     url_escaped, data=req.data, headers=req.headers,
1567                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1568
1569         return self._opener.open(req, timeout=self._socket_timeout)
1570
1571     def print_debug_header(self):
1572         if not self.params.get('verbose'):
1573             return
1574
1575         if type('') is not compat_str:
1576             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1577             self.report_warning(
1578                 'Your Python is broken! Update to a newer and supported version')
1579
1580         stdout_encoding = getattr(
1581             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1582         encoding_str = (
1583             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1584                 locale.getpreferredencoding(),
1585                 sys.getfilesystemencoding(),
1586                 stdout_encoding,
1587                 self.get_encoding()))
1588         write_string(encoding_str, encoding=None)
1589
1590         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1591         try:
1592             sp = subprocess.Popen(
1593                 ['git', 'rev-parse', '--short', 'HEAD'],
1594                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1595                 cwd=os.path.dirname(os.path.abspath(__file__)))
1596             out, err = sp.communicate()
1597             out = out.decode().strip()
1598             if re.match('[0-9a-f]+', out):
1599                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1600         except:
1601             try:
1602                 sys.exc_clear()
1603             except:
1604                 pass
1605         self._write_string('[debug] Python version %s - %s\n' % (
1606             platform.python_version(), platform_name()))
1607
1608         exe_versions = FFmpegPostProcessor.get_versions()
1609         exe_versions['rtmpdump'] = rtmpdump_version()
1610         exe_str = ', '.join(
1611             '%s %s' % (exe, v)
1612             for exe, v in sorted(exe_versions.items())
1613             if v
1614         )
1615         if not exe_str:
1616             exe_str = 'none'
1617         self._write_string('[debug] exe versions: %s\n' % exe_str)
1618
1619         proxy_map = {}
1620         for handler in self._opener.handlers:
1621             if hasattr(handler, 'proxies'):
1622                 proxy_map.update(handler.proxies)
1623         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1624
1625         if self.params.get('call_home', False):
1626             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1627             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1628             latest_version = self.urlopen(
1629                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1630             if version_tuple(latest_version) > version_tuple(__version__):
1631                 self.report_warning(
1632                     'You are using an outdated version (newest version: %s)! '
1633                     'See https://yt-dl.org/update if you need help updating.' %
1634                     latest_version)
1635
1636     def _setup_opener(self):
1637         timeout_val = self.params.get('socket_timeout')
1638         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1639
1640         opts_cookiefile = self.params.get('cookiefile')
1641         opts_proxy = self.params.get('proxy')
1642
1643         if opts_cookiefile is None:
1644             self.cookiejar = compat_cookiejar.CookieJar()
1645         else:
1646             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1647                 opts_cookiefile)
1648             if os.access(opts_cookiefile, os.R_OK):
1649                 self.cookiejar.load()
1650
1651         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1652             self.cookiejar)
1653         if opts_proxy is not None:
1654             if opts_proxy == '':
1655                 proxies = {}
1656             else:
1657                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1658         else:
1659             proxies = compat_urllib_request.getproxies()
1660             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1661             if 'http' in proxies and 'https' not in proxies:
1662                 proxies['https'] = proxies['http']
1663         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1664
1665         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1666         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1667         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1668         opener = compat_urllib_request.build_opener(
1669             https_handler, proxy_handler, cookie_processor, ydlh)
1670         # Delete the default user-agent header, which would otherwise apply in
1671         # cases where our custom HTTP handler doesn't come into play
1672         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1673         opener.addheaders = []
1674         self._opener = opener
1675
1676     def encode(self, s):
1677         if isinstance(s, bytes):
1678             return s  # Already encoded
1679
1680         try:
1681             return s.encode(self.get_encoding())
1682         except UnicodeEncodeError as err:
1683             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1684             raise
1685
1686     def get_encoding(self):
1687         encoding = self.params.get('encoding')
1688         if encoding is None:
1689             encoding = preferredencoding()
1690         return encoding
1691
1692     def _write_thumbnails(self, info_dict, filename):
1693         if self.params.get('writethumbnail', False):
1694             thumbnails = info_dict.get('thumbnails')
1695             if thumbnails:
1696                 thumbnails = [thumbnails[-1]]
1697         elif self.params.get('write_all_thumbnails', False):
1698             thumbnails = info_dict.get('thumbnails')
1699         else:
1700             return
1701
1702         if not thumbnails:
1703             # No thumbnails present, so return immediately
1704             return
1705
1706         for t in thumbnails:
1707             thumb_ext = determine_ext(t['url'], 'jpg')
1708             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1709             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1710             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1711
1712             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1713                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1714                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1715             else:
1716                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1717                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1718                 try:
1719                     uf = self.urlopen(t['url'])
1720                     with open(thumb_filename, 'wb') as thumbf:
1721                         shutil.copyfileobj(uf, thumbf)
1722                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1723                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1724                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1725                     self.report_warning('Unable to download thumbnail "%s": %s' %
1726                                         (t['url'], compat_str(err)))