youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import os
  14 import platform
  15 import re
  16 import shutil
  17 import subprocess
  18 import socket
  19 import sys
  20 import time
  21 import traceback
  22
  23 if os.name == 'nt':
  24     import ctypes
  25
  26 from .compat import (
  27     compat_cookiejar,
  28     compat_expanduser,
  29     compat_http_client,
  30     compat_str,
  31     compat_urllib_error,
  32     compat_urllib_request,
  33 )
  34 from .utils import (
  35     escape_url,
  36     ContentTooShortError,
  37     date_from_str,
  38     DateRange,
  39     DEFAULT_OUTTMPL,
  40     determine_ext,
  41     DownloadError,
  42     encodeFilename,
  43     ExtractorError,
  44     format_bytes,
  45     formatSeconds,
  46     get_term_width,
  47     locked_file,
  48     make_HTTPS_handler,
  49     MaxDownloadsReached,
  50     PagedList,
  51     PostProcessingError,
  52     platform_name,
  53     preferredencoding,
  54     SameFileError,
  55     sanitize_filename,
  56     subtitles_filename,
  57     takewhile_inclusive,
  58     UnavailableVideoError,
  59     url_basename,
  60     write_json_file,
  61     write_string,
  62     YoutubeDLHandler,
  63     prepend_extension,
  64     args_to_str,
  65 )
  66 from .cache import Cache
  67 from .extractor import get_info_extractor, gen_extractors
  68 from .downloader import get_suitable_downloader
  69 from .downloader.rtmp import rtmpdump_version
  70 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  71 from .version import __version__
  72
  73
  74 class YoutubeDL(object):
  75     """YoutubeDL class.
  76
  77     YoutubeDL objects are the ones responsible of downloading the
  78     actual video file and writing it to disk if the user has requested
  79     it, among some other tasks. In most cases there should be one per
  80     program. As, given a video URL, the downloader doesn't know how to
  81     extract all the needed information, task that InfoExtractors do, it
  82     has to pass the URL to one of them.
  83
  84     For this, YoutubeDL objects have a method that allows
  85     InfoExtractors to be registered in a given order. When it is passed
  86     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  87     finds that reports being able to handle it. The InfoExtractor extracts
  88     all the information about the video or videos the URL refers to, and
  89     YoutubeDL process the extracted information, possibly using a File
  90     Downloader to download the video.
  91
  92     YoutubeDL objects accept a lot of parameters. In order not to saturate
  93     the object constructor with arguments, it receives a dictionary of
  94     options instead. These options are available through the params
  95     attribute for the InfoExtractors to use. The YoutubeDL also
  96     registers itself as the downloader in charge for the InfoExtractors
  97     that are added to it, so this is a "mutual registration".
  98
  99     Available options:
 100
 101     username:          Username for authentication purposes.
 102     password:          Password for authentication purposes.
 103     videopassword:     Password for acces a video.
 104     usenetrc:          Use netrc for authentication instead.
 105     verbose:           Print additional info to stdout.
 106     quiet:             Do not print messages to stdout.
 107     no_warnings:       Do not print out anything for warnings.
 108     forceurl:          Force printing final URL.
 109     forcetitle:        Force printing title.
 110     forceid:           Force printing ID.
 111     forcethumbnail:    Force printing thumbnail URL.
 112     forcedescription:  Force printing description.
 113     forcefilename:     Force printing final filename.
 114     forceduration:     Force printing duration.
 115     forcejson:         Force printing info_dict as JSON.
 116     dump_single_json:  Force printing the info_dict of the whole playlist
 117                        (or video) as a single JSON line.
 118     simulate:          Do not download the video files.
 119     format:            Video format code.
 120     format_limit:      Highest quality format to try.
 121     outtmpl:           Template for output names.
 122     restrictfilenames: Do not allow "&" and spaces in file names
 123     ignoreerrors:      Do not stop on download errors.
 124     nooverwrites:      Prevent overwriting files.
 125     playliststart:     Playlist item to start at.
 126     playlistend:       Playlist item to end at.
 127     matchtitle:        Download only matching titles.
 128     rejecttitle:       Reject downloads for matching titles.
 129     logger:            Log messages to a logging.Logger instance.
 130     logtostderr:       Log messages to stderr instead of stdout.
 131     writedescription:  Write the video description to a .description file
 132     writeinfojson:     Write the video description to a .info.json file
 133     writeannotations:  Write the video annotations to a .annotations.xml file
 134     writethumbnail:    Write the thumbnail image to a file
 135     writesubtitles:    Write the video subtitles to a file
 136     writeautomaticsub: Write the automatic subtitles to a file
 137     allsubtitles:      Downloads all the subtitles of the video
 138                        (requires writesubtitles or writeautomaticsub)
 139     listsubtitles:     Lists all available subtitles for the video
 140     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 141     subtitleslangs:    List of languages of the subtitles to download
 142     keepvideo:         Keep the video file after post-processing
 143     daterange:         A DateRange object, download only if the upload_date is in the range.
 144     skip_download:     Skip the actual download of the video file
 145     cachedir:          Location of the cache files in the filesystem.
 146                        False to disable filesystem cache.
 147     noplaylist:        Download single video instead of a playlist if in doubt.
 148     age_limit:         An integer representing the user's age in years.
 149                        Unsuitable videos for the given age are skipped.
 150     min_views:         An integer representing the minimum view count the video
 151                        must have in order to not be skipped.
 152                        Videos without view count information are always
 153                        downloaded. None for no limit.
 154     max_views:         An integer representing the maximum view count.
 155                        Videos that are more popular than that are not
 156                        downloaded.
 157                        Videos without view count information are always
 158                        downloaded. None for no limit.
 159     download_archive:  File name of a file where all downloads are recorded.
 160                        Videos already present in the file are not downloaded
 161                        again.
 162     cookiefile:        File name where cookies should be read from and dumped to.
 163     nocheckcertificate:Do not verify SSL certificates
 164     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 165                        At the moment, this is only supported by YouTube.
 166     proxy:             URL of the proxy server to use
 167     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 168     bidi_workaround:   Work around buggy terminals without bidirectional text
 169                        support, using fridibi
 170     debug_printtraffic:Print out sent and received HTTP traffic
 171     include_ads:       Download ads as well
 172     default_search:    Prepend this string if an input url is not valid.
 173                        'auto' for elaborate guessing
 174     encoding:          Use this encoding instead of the system-specified.
 175     extract_flat:      Do not resolve URLs, return the immediate result.
 176                        Pass in 'in_playlist' to only show this behavior for
 177                        playlist items.
 178
 179     The following parameters are not used by YoutubeDL itself, they are used by
 180     the FileDownloader:
 181     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 182     noresizebuffer, retries, continuedl, noprogress, consoletitle
 183
 184     The following options are used by the post processors:
 185     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 186                        otherwise prefer avconv.
 187     exec_cmd:          Arbitrary command to run after downloading
 188     """
 189
 190     params = None
 191     _ies = []
 192     _pps = []
 193     _download_retcode = None
 194     _num_downloads = None
 195     _screen_file = None
 196
 197     def __init__(self, params=None, auto_init=True):
 198         """Create a FileDownloader object with the given options."""
 199         if params is None:
 200             params = {}
 201         self._ies = []
 202         self._ies_instances = {}
 203         self._pps = []
 204         self._progress_hooks = []
 205         self._download_retcode = 0
 206         self._num_downloads = 0
 207         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 208         self._err_file = sys.stderr
 209         self.params = params
 210         self.cache = Cache(self)
 211
 212         if params.get('bidi_workaround', False):
 213             try:
 214                 import pty
 215                 master, slave = pty.openpty()
 216                 width = get_term_width()
 217                 if width is None:
 218                     width_args = []
 219                 else:
 220                     width_args = ['-w', str(width)]
 221                 sp_kwargs = dict(
 222                     stdin=subprocess.PIPE,
 223                     stdout=slave,
 224                     stderr=self._err_file)
 225                 try:
 226                     self._output_process = subprocess.Popen(
 227                         ['bidiv'] + width_args, **sp_kwargs
 228                     )
 229                 except OSError:
 230                     self._output_process = subprocess.Popen(
 231                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 232                 self._output_channel = os.fdopen(master, 'rb')
 233             except OSError as ose:
 234                 if ose.errno == 2:
 235                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 236                 else:
 237                     raise
 238
 239         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 240                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 241                 and not params.get('restrictfilenames', False)):
 242             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 243             self.report_warning(
 244                 'Assuming --restrict-filenames since file system encoding '
 245                 'cannot encode all characters. '
 246                 'Set the LC_ALL environment variable to fix this.')
 247             self.params['restrictfilenames'] = True
 248
 249         if '%(stitle)s' in self.params.get('outtmpl', ''):
 250             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 251
 252         self._setup_opener()
 253
 254         if auto_init:
 255             self.print_debug_header()
 256             self.add_default_info_extractors()
 257
 258     def warn_if_short_id(self, argv):
 259         # short YouTube ID starting with dash?
 260         idxs = [
 261             i for i, a in enumerate(argv)
 262             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 263         if idxs:
 264             correct_argv = (
 265                 ['youtube-dl'] +
 266                 [a for i, a in enumerate(argv) if i not in idxs] +
 267                 ['--'] + [argv[i] for i in idxs]
 268             )
 269             self.report_warning(
 270                 'Long argument string detected. '
 271                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 272                 args_to_str(correct_argv))
 273
 274     def add_info_extractor(self, ie):
 275         """Add an InfoExtractor object to the end of the list."""
 276         self._ies.append(ie)
 277         self._ies_instances[ie.ie_key()] = ie
 278         ie.set_downloader(self)
 279
 280     def get_info_extractor(self, ie_key):
 281         """
 282         Get an instance of an IE with name ie_key, it will try to get one from
 283         the _ies list, if there's no instance it will create a new one and add
 284         it to the extractor list.
 285         """
 286         ie = self._ies_instances.get(ie_key)
 287         if ie is None:
 288             ie = get_info_extractor(ie_key)()
 289             self.add_info_extractor(ie)
 290         return ie
 291
 292     def add_default_info_extractors(self):
 293         """
 294         Add the InfoExtractors returned by gen_extractors to the end of the list
 295         """
 296         for ie in gen_extractors():
 297             self.add_info_extractor(ie)
 298
 299     def add_post_processor(self, pp):
 300         """Add a PostProcessor object to the end of the chain."""
 301         self._pps.append(pp)
 302         pp.set_downloader(self)
 303
 304     def add_progress_hook(self, ph):
 305         """Add the progress hook (currently only for the file downloader)"""
 306         self._progress_hooks.append(ph)
 307
 308     def _bidi_workaround(self, message):
 309         if not hasattr(self, '_output_channel'):
 310             return message
 311
 312         assert hasattr(self, '_output_process')
 313         assert isinstance(message, compat_str)
 314         line_count = message.count('\n') + 1
 315         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 316         self._output_process.stdin.flush()
 317         res = ''.join(self._output_channel.readline().decode('utf-8')
 318                       for _ in range(line_count))
 319         return res[:-len('\n')]
 320
 321     def to_screen(self, message, skip_eol=False):
 322         """Print message to stdout if not in quiet mode."""
 323         return self.to_stdout(message, skip_eol, check_quiet=True)
 324
 325     def _write_string(self, s, out=None):
 326         write_string(s, out=out, encoding=self.params.get('encoding'))
 327
 328     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 329         """Print message to stdout if not in quiet mode."""
 330         if self.params.get('logger'):
 331             self.params['logger'].debug(message)
 332         elif not check_quiet or not self.params.get('quiet', False):
 333             message = self._bidi_workaround(message)
 334             terminator = ['\n', ''][skip_eol]
 335             output = message + terminator
 336
 337             self._write_string(output, self._screen_file)
 338
 339     def to_stderr(self, message):
 340         """Print message to stderr."""
 341         assert isinstance(message, compat_str)
 342         if self.params.get('logger'):
 343             self.params['logger'].error(message)
 344         else:
 345             message = self._bidi_workaround(message)
 346             output = message + '\n'
 347             self._write_string(output, self._err_file)
 348
 349     def to_console_title(self, message):
 350         if not self.params.get('consoletitle', False):
 351             return
 352         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 353             # c_wchar_p() might not be necessary if `message` is
 354             # already of type unicode()
 355             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 356         elif 'TERM' in os.environ:
 357             self._write_string('\033]0;%s\007' % message, self._screen_file)
 358
 359     def save_console_title(self):
 360         if not self.params.get('consoletitle', False):
 361             return
 362         if 'TERM' in os.environ:
 363             # Save the title on stack
 364             self._write_string('\033[22;0t', self._screen_file)
 365
 366     def restore_console_title(self):
 367         if not self.params.get('consoletitle', False):
 368             return
 369         if 'TERM' in os.environ:
 370             # Restore the title from stack
 371             self._write_string('\033[23;0t', self._screen_file)
 372
 373     def __enter__(self):
 374         self.save_console_title()
 375         return self
 376
 377     def __exit__(self, *args):
 378         self.restore_console_title()
 379
 380         if self.params.get('cookiefile') is not None:
 381             self.cookiejar.save()
 382
 383     def trouble(self, message=None, tb=None):
 384         """Determine action to take when a download problem appears.
 385
 386         Depending on if the downloader has been configured to ignore
 387         download errors or not, this method may throw an exception or
 388         not when errors are found, after printing the message.
 389
 390         tb, if given, is additional traceback information.
 391         """
 392         if message is not None:
 393             self.to_stderr(message)
 394         if self.params.get('verbose'):
 395             if tb is None:
 396                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 397                     tb = ''
 398                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 399                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 400                     tb += compat_str(traceback.format_exc())
 401                 else:
 402                     tb_data = traceback.format_list(traceback.extract_stack())
 403                     tb = ''.join(tb_data)
 404             self.to_stderr(tb)
 405         if not self.params.get('ignoreerrors', False):
 406             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 407                 exc_info = sys.exc_info()[1].exc_info
 408             else:
 409                 exc_info = sys.exc_info()
 410             raise DownloadError(message, exc_info)
 411         self._download_retcode = 1
 412
 413     def report_warning(self, message):
 414         '''
 415         Print the message to stderr, it will be prefixed with 'WARNING:'
 416         If stderr is a tty file the 'WARNING:' will be colored
 417         '''
 418         if self.params.get('logger') is not None:
 419             self.params['logger'].warning(message)
 420         else:
 421             if self.params.get('no_warnings'):
 422                 return
 423             if self._err_file.isatty() and os.name != 'nt':
 424                 _msg_header = '\033[0;33mWARNING:\033[0m'
 425             else:
 426                 _msg_header = 'WARNING:'
 427             warning_message = '%s %s' % (_msg_header, message)
 428             self.to_stderr(warning_message)
 429
 430     def report_error(self, message, tb=None):
 431         '''
 432         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 433         in red if stderr is a tty file.
 434         '''
 435         if self._err_file.isatty() and os.name != 'nt':
 436             _msg_header = '\033[0;31mERROR:\033[0m'
 437         else:
 438             _msg_header = 'ERROR:'
 439         error_message = '%s %s' % (_msg_header, message)
 440         self.trouble(error_message, tb)
 441
 442     def report_file_already_downloaded(self, file_name):
 443         """Report file has already been fully downloaded."""
 444         try:
 445             self.to_screen('[download] %s has already been downloaded' % file_name)
 446         except UnicodeEncodeError:
 447             self.to_screen('[download] The file has already been downloaded')
 448
 449     def prepare_filename(self, info_dict):
 450         """Generate the output filename."""
 451         try:
 452             template_dict = dict(info_dict)
 453
 454             template_dict['epoch'] = int(time.time())
 455             autonumber_size = self.params.get('autonumber_size')
 456             if autonumber_size is None:
 457                 autonumber_size = 5
 458             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 459             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 460             if template_dict.get('playlist_index') is not None:
 461                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 462             if template_dict.get('resolution') is None:
 463                 if template_dict.get('width') and template_dict.get('height'):
 464                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 465                 elif template_dict.get('height'):
 466                     template_dict['resolution'] = '%sp' % template_dict['height']
 467                 elif template_dict.get('width'):
 468                     template_dict['resolution'] = '?x%d' % template_dict['width']
 469
 470             sanitize = lambda k, v: sanitize_filename(
 471                 compat_str(v),
 472                 restricted=self.params.get('restrictfilenames'),
 473                 is_id=(k == 'id'))
 474             template_dict = dict((k, sanitize(k, v))
 475                                  for k, v in template_dict.items()
 476                                  if v is not None)
 477             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 478
 479             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 480             tmpl = compat_expanduser(outtmpl)
 481             filename = tmpl % template_dict
 482             return filename
 483         except ValueError as err:
 484             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 485             return None
 486
 487     def _match_entry(self, info_dict):
 488         """ Returns None iff the file should be downloaded """
 489
 490         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 491         if 'title' in info_dict:
 492             # This can happen when we're just evaluating the playlist
 493             title = info_dict['title']
 494             matchtitle = self.params.get('matchtitle', False)
 495             if matchtitle:
 496                 if not re.search(matchtitle, title, re.IGNORECASE):
 497                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 498             rejecttitle = self.params.get('rejecttitle', False)
 499             if rejecttitle:
 500                 if re.search(rejecttitle, title, re.IGNORECASE):
 501                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 502         date = info_dict.get('upload_date', None)
 503         if date is not None:
 504             dateRange = self.params.get('daterange', DateRange())
 505             if date not in dateRange:
 506                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 507         view_count = info_dict.get('view_count', None)
 508         if view_count is not None:
 509             min_views = self.params.get('min_views')
 510             if min_views is not None and view_count < min_views:
 511                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 512             max_views = self.params.get('max_views')
 513             if max_views is not None and view_count > max_views:
 514                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 515         age_limit = self.params.get('age_limit')
 516         if age_limit is not None:
 517             actual_age_limit = info_dict.get('age_limit')
 518             if actual_age_limit is None:
 519                 actual_age_limit = 0
 520             if age_limit < actual_age_limit:
 521                 return 'Skipping "' + title + '" because it is age restricted'
 522         if self.in_download_archive(info_dict):
 523             return '%s has already been recorded in archive' % video_title
 524         return None
 525
 526     @staticmethod
 527     def add_extra_info(info_dict, extra_info):
 528         '''Set the keys from extra_info in info dict if they are missing'''
 529         for key, value in extra_info.items():
 530             info_dict.setdefault(key, value)
 531
 532     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 533                      process=True):
 534         '''
 535         Returns a list with a dictionary for each video we find.
 536         If 'download', also downloads the videos.
 537         extra_info is a dict containing the extra values to add to each result
 538          '''
 539
 540         if ie_key:
 541             ies = [self.get_info_extractor(ie_key)]
 542         else:
 543             ies = self._ies
 544
 545         for ie in ies:
 546             if not ie.suitable(url):
 547                 continue
 548
 549             if not ie.working():
 550                 self.report_warning('The program functionality for this site has been marked as broken, '
 551                                     'and will probably not work.')
 552
 553             try:
 554                 ie_result = ie.extract(url)
 555                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 556                     break
 557                 if isinstance(ie_result, list):
 558                     # Backwards compatibility: old IE result format
 559                     ie_result = {
 560                         '_type': 'compat_list',
 561                         'entries': ie_result,
 562                     }
 563                 self.add_default_extra_info(ie_result, ie, url)
 564                 if process:
 565                     return self.process_ie_result(ie_result, download, extra_info)
 566                 else:
 567                     return ie_result
 568             except ExtractorError as de:  # An error we somewhat expected
 569                 self.report_error(compat_str(de), de.format_traceback())
 570                 break
 571             except MaxDownloadsReached:
 572                 raise
 573             except Exception as e:
 574                 if self.params.get('ignoreerrors', False):
 575                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 576                     break
 577                 else:
 578                     raise
 579         else:
 580             self.report_error('no suitable InfoExtractor for URL %s' % url)
 581
 582     def add_default_extra_info(self, ie_result, ie, url):
 583         self.add_extra_info(ie_result, {
 584             'extractor': ie.IE_NAME,
 585             'webpage_url': url,
 586             'webpage_url_basename': url_basename(url),
 587             'extractor_key': ie.ie_key(),
 588         })
 589
 590     def process_ie_result(self, ie_result, download=True, extra_info={}):
 591         """
 592         Take the result of the ie(may be modified) and resolve all unresolved
 593         references (URLs, playlist items).
 594
 595         It will also download the videos if 'download'.
 596         Returns the resolved ie_result.
 597         """
 598
 599         result_type = ie_result.get('_type', 'video')
 600
 601         if result_type in ('url', 'url_transparent'):
 602             extract_flat = self.params.get('extract_flat', False)
 603             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 604                     extract_flat is True):
 605                 if self.params.get('forcejson', False):
 606                     self.to_stdout(json.dumps(ie_result))
 607                 return ie_result
 608
 609         if result_type == 'video':
 610             self.add_extra_info(ie_result, extra_info)
 611             return self.process_video_result(ie_result, download=download)
 612         elif result_type == 'url':
 613             # We have to add extra_info to the results because it may be
 614             # contained in a playlist
 615             return self.extract_info(ie_result['url'],
 616                                      download,
 617                                      ie_key=ie_result.get('ie_key'),
 618                                      extra_info=extra_info)
 619         elif result_type == 'url_transparent':
 620             # Use the information from the embedding page
 621             info = self.extract_info(
 622                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 623                 extra_info=extra_info, download=False, process=False)
 624
 625             new_result = ie_result.copy()
 626             for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
 627                       'entries', 'ie_key', 'duration',
 628                       'subtitles', 'annotations', 'format',
 629                       'thumbnail', 'thumbnails'):
 630                 if f in new_result:
 631                     del new_result[f]
 632                 if f in info:
 633                     new_result[f] = info[f]
 634
 635             assert new_result.get('_type') != 'url_transparent'
 636
 637             return self.process_ie_result(
 638                 new_result, download=download, extra_info=extra_info)
 639         elif result_type == 'playlist' or result_type == 'multi_video':
 640             # We process each entry in the playlist
 641             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 642             self.to_screen('[download] Downloading playlist: %s' % playlist)
 643
 644             playlist_results = []
 645
 646             playliststart = self.params.get('playliststart', 1) - 1
 647             playlistend = self.params.get('playlistend', None)
 648             # For backwards compatibility, interpret -1 as whole list
 649             if playlistend == -1:
 650                 playlistend = None
 651
 652             ie_entries = ie_result['entries']
 653             if isinstance(ie_entries, list):
 654                 n_all_entries = len(ie_entries)
 655                 entries = ie_entries[playliststart:playlistend]
 656                 n_entries = len(entries)
 657                 self.to_screen(
 658                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 659                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 660             elif isinstance(ie_entries, PagedList):
 661                 entries = ie_entries.getslice(
 662                     playliststart, playlistend)
 663                 n_entries = len(entries)
 664                 self.to_screen(
 665                     "[%s] playlist %s: Downloading %d videos" %
 666                     (ie_result['extractor'], playlist, n_entries))
 667             else:  # iterable
 668                 entries = list(itertools.islice(
 669                     ie_entries, playliststart, playlistend))
 670                 n_entries = len(entries)
 671                 self.to_screen(
 672                     "[%s] playlist %s: Downloading %d videos" %
 673                     (ie_result['extractor'], playlist, n_entries))
 674
 675             for i, entry in enumerate(entries, 1):
 676                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 677                 extra = {
 678                     'n_entries': n_entries,
 679                     'playlist': playlist,
 680                     'playlist_id': ie_result.get('id'),
 681                     'playlist_title': ie_result.get('title'),
 682                     'playlist_index': i + playliststart,
 683                     'extractor': ie_result['extractor'],
 684                     'webpage_url': ie_result['webpage_url'],
 685                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 686                     'extractor_key': ie_result['extractor_key'],
 687                 }
 688
 689                 reason = self._match_entry(entry)
 690                 if reason is not None:
 691                     self.to_screen('[download] ' + reason)
 692                     continue
 693
 694                 entry_result = self.process_ie_result(entry,
 695                                                       download=download,
 696                                                       extra_info=extra)
 697                 playlist_results.append(entry_result)
 698             ie_result['entries'] = playlist_results
 699             return ie_result
 700         elif result_type == 'compat_list':
 701             self.report_warning(
 702                 'Extractor %s returned a compat_list result. '
 703                 'It needs to be updated.' % ie_result.get('extractor'))
 704
 705             def _fixup(r):
 706                 self.add_extra_info(
 707                     r,
 708                     {
 709                         'extractor': ie_result['extractor'],
 710                         'webpage_url': ie_result['webpage_url'],
 711                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 712                         'extractor_key': ie_result['extractor_key'],
 713                     }
 714                 )
 715                 return r
 716             ie_result['entries'] = [
 717                 self.process_ie_result(_fixup(r), download, extra_info)
 718                 for r in ie_result['entries']
 719             ]
 720             return ie_result
 721         else:
 722             raise Exception('Invalid result type: %s' % result_type)
 723
 724     def select_format(self, format_spec, available_formats):
 725         if format_spec == 'best' or format_spec is None:
 726             return available_formats[-1]
 727         elif format_spec == 'worst':
 728             return available_formats[0]
 729         elif format_spec == 'bestaudio':
 730             audio_formats = [
 731                 f for f in available_formats
 732                 if f.get('vcodec') == 'none']
 733             if audio_formats:
 734                 return audio_formats[-1]
 735         elif format_spec == 'worstaudio':
 736             audio_formats = [
 737                 f for f in available_formats
 738                 if f.get('vcodec') == 'none']
 739             if audio_formats:
 740                 return audio_formats[0]
 741         elif format_spec == 'bestvideo':
 742             video_formats = [
 743                 f for f in available_formats
 744                 if f.get('acodec') == 'none']
 745             if video_formats:
 746                 return video_formats[-1]
 747         elif format_spec == 'worstvideo':
 748             video_formats = [
 749                 f for f in available_formats
 750                 if f.get('acodec') == 'none']
 751             if video_formats:
 752                 return video_formats[0]
 753         else:
 754             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 755             if format_spec in extensions:
 756                 filter_f = lambda f: f['ext'] == format_spec
 757             else:
 758                 filter_f = lambda f: f['format_id'] == format_spec
 759             matches = list(filter(filter_f, available_formats))
 760             if matches:
 761                 return matches[-1]
 762         return None
 763
 764     def process_video_result(self, info_dict, download=True):
 765         assert info_dict.get('_type', 'video') == 'video'
 766
 767         if 'id' not in info_dict:
 768             raise ExtractorError('Missing "id" field in extractor result')
 769         if 'title' not in info_dict:
 770             raise ExtractorError('Missing "title" field in extractor result')
 771
 772         if 'playlist' not in info_dict:
 773             # It isn't part of a playlist
 774             info_dict['playlist'] = None
 775             info_dict['playlist_index'] = None
 776
 777         thumbnails = info_dict.get('thumbnails')
 778         if thumbnails:
 779             thumbnails.sort(key=lambda t: (
 780                 t.get('width'), t.get('height'), t.get('url')))
 781             for t in thumbnails:
 782                 if 'width' in t and 'height' in t:
 783                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 784
 785         if thumbnails and 'thumbnail' not in info_dict:
 786             info_dict['thumbnail'] = thumbnails[-1]['url']
 787
 788         if 'display_id' not in info_dict and 'id' in info_dict:
 789             info_dict['display_id'] = info_dict['id']
 790
 791         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 792             # Working around negative timestamps in Windows
 793             # (see http://bugs.python.org/issue1646728)
 794             if info_dict['timestamp'] < 0 and os.name == 'nt':
 795                 info_dict['timestamp'] = 0
 796             upload_date = datetime.datetime.utcfromtimestamp(
 797                 info_dict['timestamp'])
 798             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 799
 800         # This extractors handle format selection themselves
 801         if info_dict['extractor'] in ['Youku']:
 802             if download:
 803                 self.process_info(info_dict)
 804             return info_dict
 805
 806         # We now pick which formats have to be downloaded
 807         if info_dict.get('formats') is None:
 808             # There's only one format available
 809             formats = [info_dict]
 810         else:
 811             formats = info_dict['formats']
 812
 813         if not formats:
 814             raise ExtractorError('No video formats found!')
 815
 816         # We check that all the formats have the format and format_id fields
 817         for i, format in enumerate(formats):
 818             if 'url' not in format:
 819                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 820
 821             if format.get('format_id') is None:
 822                 format['format_id'] = compat_str(i)
 823             if format.get('format') is None:
 824                 format['format'] = '{id} - {res}{note}'.format(
 825                     id=format['format_id'],
 826                     res=self.format_resolution(format),
 827                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 828                 )
 829             # Automatically determine file extension if missing
 830             if 'ext' not in format:
 831                 format['ext'] = determine_ext(format['url']).lower()
 832
 833         format_limit = self.params.get('format_limit', None)
 834         if format_limit:
 835             formats = list(takewhile_inclusive(
 836                 lambda f: f['format_id'] != format_limit, formats
 837             ))
 838
 839         # TODO Central sorting goes here
 840
 841         if formats[0] is not info_dict:
 842             # only set the 'formats' fields if the original info_dict list them
 843             # otherwise we end up with a circular reference, the first (and unique)
 844             # element in the 'formats' field in info_dict is info_dict itself,
 845             # wich can't be exported to json
 846             info_dict['formats'] = formats
 847         if self.params.get('listformats', None):
 848             self.list_formats(info_dict)
 849             return
 850
 851         req_format = self.params.get('format')
 852         if req_format is None:
 853             req_format = 'best'
 854         formats_to_download = []
 855         # The -1 is for supporting YoutubeIE
 856         if req_format in ('-1', 'all'):
 857             formats_to_download = formats
 858         else:
 859             for rfstr in req_format.split(','):
 860                 # We can accept formats requested in the format: 34/5/best, we pick
 861                 # the first that is available, starting from left
 862                 req_formats = rfstr.split('/')
 863                 for rf in req_formats:
 864                     if re.match(r'.+?\+.+?', rf) is not None:
 865                         # Two formats have been requested like '137+139'
 866                         format_1, format_2 = rf.split('+')
 867                         formats_info = (self.select_format(format_1, formats),
 868                                         self.select_format(format_2, formats))
 869                         if all(formats_info):
 870                             # The first format must contain the video and the
 871                             # second the audio
 872                             if formats_info[0].get('vcodec') == 'none':
 873                                 self.report_error('The first format must '
 874                                                   'contain the video, try using '
 875                                                   '"-f %s+%s"' % (format_2, format_1))
 876                                 return
 877                             selected_format = {
 878                                 'requested_formats': formats_info,
 879                                 'format': rf,
 880                                 'ext': formats_info[0]['ext'],
 881                             }
 882                         else:
 883                             selected_format = None
 884                     else:
 885                         selected_format = self.select_format(rf, formats)
 886                     if selected_format is not None:
 887                         formats_to_download.append(selected_format)
 888                         break
 889         if not formats_to_download:
 890             raise ExtractorError('requested format not available',
 891                                  expected=True)
 892
 893         if download:
 894             if len(formats_to_download) > 1:
 895                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 896             for format in formats_to_download:
 897                 new_info = dict(info_dict)
 898                 new_info.update(format)
 899                 self.process_info(new_info)
 900         # We update the info dict with the best quality format (backwards compatibility)
 901         info_dict.update(formats_to_download[-1])
 902         return info_dict
 903
 904     def process_info(self, info_dict):
 905         """Process a single resolved IE result."""
 906
 907         assert info_dict.get('_type', 'video') == 'video'
 908
 909         max_downloads = self.params.get('max_downloads')
 910         if max_downloads is not None:
 911             if self._num_downloads >= int(max_downloads):
 912                 raise MaxDownloadsReached()
 913
 914         info_dict['fulltitle'] = info_dict['title']
 915         if len(info_dict['title']) > 200:
 916             info_dict['title'] = info_dict['title'][:197] + '...'
 917
 918         # Keep for backwards compatibility
 919         info_dict['stitle'] = info_dict['title']
 920
 921         if 'format' not in info_dict:
 922             info_dict['format'] = info_dict['ext']
 923
 924         reason = self._match_entry(info_dict)
 925         if reason is not None:
 926             self.to_screen('[download] ' + reason)
 927             return
 928
 929         self._num_downloads += 1
 930
 931         filename = self.prepare_filename(info_dict)
 932
 933         # Forced printings
 934         if self.params.get('forcetitle', False):
 935             self.to_stdout(info_dict['fulltitle'])
 936         if self.params.get('forceid', False):
 937             self.to_stdout(info_dict['id'])
 938         if self.params.get('forceurl', False):
 939             if info_dict.get('requested_formats') is not None:
 940                 for f in info_dict['requested_formats']:
 941                     self.to_stdout(f['url'] + f.get('play_path', ''))
 942             else:
 943                 # For RTMP URLs, also include the playpath
 944                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 945         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 946             self.to_stdout(info_dict['thumbnail'])
 947         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 948             self.to_stdout(info_dict['description'])
 949         if self.params.get('forcefilename', False) and filename is not None:
 950             self.to_stdout(filename)
 951         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 952             self.to_stdout(formatSeconds(info_dict['duration']))
 953         if self.params.get('forceformat', False):
 954             self.to_stdout(info_dict['format'])
 955         if self.params.get('forcejson', False):
 956             info_dict['_filename'] = filename
 957             self.to_stdout(json.dumps(info_dict))
 958         if self.params.get('dump_single_json', False):
 959             info_dict['_filename'] = filename
 960
 961         # Do nothing else if in simulate mode
 962         if self.params.get('simulate', False):
 963             return
 964
 965         if filename is None:
 966             return
 967
 968         try:
 969             dn = os.path.dirname(encodeFilename(filename))
 970             if dn and not os.path.exists(dn):
 971                 os.makedirs(dn)
 972         except (OSError, IOError) as err:
 973             self.report_error('unable to create directory ' + compat_str(err))
 974             return
 975
 976         if self.params.get('writedescription', False):
 977             descfn = filename + '.description'
 978             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 979                 self.to_screen('[info] Video description is already present')
 980             else:
 981                 try:
 982                     self.to_screen('[info] Writing video description to: ' + descfn)
 983                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 984                         descfile.write(info_dict['description'])
 985                 except (KeyError, TypeError):
 986                     self.report_warning('There\'s no description to write.')
 987                 except (OSError, IOError):
 988                     self.report_error('Cannot write description file ' + descfn)
 989                     return
 990
 991         if self.params.get('writeannotations', False):
 992             annofn = filename + '.annotations.xml'
 993             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 994                 self.to_screen('[info] Video annotations are already present')
 995             else:
 996                 try:
 997                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 998                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 999                         annofile.write(info_dict['annotations'])
1000                 except (KeyError, TypeError):
1001                     self.report_warning('There are no annotations to write.')
1002                 except (OSError, IOError):
1003                     self.report_error('Cannot write annotations file: ' + annofn)
1004                     return
1005
1006         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1007                                        self.params.get('writeautomaticsub')])
1008
1009         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1010             # subtitles download errors are already managed as troubles in relevant IE
1011             # that way it will silently go on when used with unsupporting IE
1012             subtitles = info_dict['subtitles']
1013             sub_format = self.params.get('subtitlesformat', 'srt')
1014             for sub_lang in subtitles.keys():
1015                 sub = subtitles[sub_lang]
1016                 if sub is None:
1017                     continue
1018                 try:
1019                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1020                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1021                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1022                     else:
1023                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1024                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1025                             subfile.write(sub)
1026                 except (OSError, IOError):
1027                     self.report_error('Cannot write subtitles file ' + sub_filename)
1028                     return
1029
1030         if self.params.get('writeinfojson', False):
1031             infofn = os.path.splitext(filename)[0] + '.info.json'
1032             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1033                 self.to_screen('[info] Video description metadata is already present')
1034             else:
1035                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1036                 try:
1037                     write_json_file(info_dict, infofn)
1038                 except (OSError, IOError):
1039                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1040                     return
1041
1042         if self.params.get('writethumbnail', False):
1043             if info_dict.get('thumbnail') is not None:
1044                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1045                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1046                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1047                     self.to_screen('[%s] %s: Thumbnail is already present' %
1048                                    (info_dict['extractor'], info_dict['id']))
1049                 else:
1050                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1051                                    (info_dict['extractor'], info_dict['id']))
1052                     try:
1053                         uf = self.urlopen(info_dict['thumbnail'])
1054                         with open(thumb_filename, 'wb') as thumbf:
1055                             shutil.copyfileobj(uf, thumbf)
1056                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1057                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1058                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1059                         self.report_warning('Unable to download thumbnail "%s": %s' %
1060                                             (info_dict['thumbnail'], compat_str(err)))
1061
1062         if not self.params.get('skip_download', False):
1063             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1064                 success = True
1065             else:
1066                 try:
1067                     def dl(name, info):
1068                         fd = get_suitable_downloader(info)(self, self.params)
1069                         for ph in self._progress_hooks:
1070                             fd.add_progress_hook(ph)
1071                         if self.params.get('verbose'):
1072                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1073                         return fd.download(name, info)
1074                     if info_dict.get('requested_formats') is not None:
1075                         downloaded = []
1076                         success = True
1077                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1078                         if not merger._executable:
1079                             postprocessors = []
1080                             self.report_warning('You have requested multiple '
1081                                                 'formats but ffmpeg or avconv are not installed.'
1082                                                 ' The formats won\'t be merged')
1083                         else:
1084                             postprocessors = [merger]
1085                         for f in info_dict['requested_formats']:
1086                             new_info = dict(info_dict)
1087                             new_info.update(f)
1088                             fname = self.prepare_filename(new_info)
1089                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1090                             downloaded.append(fname)
1091                             partial_success = dl(fname, new_info)
1092                             success = success and partial_success
1093                         info_dict['__postprocessors'] = postprocessors
1094                         info_dict['__files_to_merge'] = downloaded
1095                     else:
1096                         # Just a single file
1097                         success = dl(filename, info_dict)
1098                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1099                     self.report_error('unable to download video data: %s' % str(err))
1100                     return
1101                 except (OSError, IOError) as err:
1102                     raise UnavailableVideoError(err)
1103                 except (ContentTooShortError, ) as err:
1104                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1105                     return
1106
1107             if success:
1108                 try:
1109                     self.post_process(filename, info_dict)
1110                 except (PostProcessingError) as err:
1111                     self.report_error('postprocessing: %s' % str(err))
1112                     return
1113
1114         self.record_download_archive(info_dict)
1115
1116     def download(self, url_list):
1117         """Download a given list of URLs."""
1118         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1119         if (len(url_list) > 1 and
1120                 '%' not in outtmpl
1121                 and self.params.get('max_downloads') != 1):
1122             raise SameFileError(outtmpl)
1123
1124         for url in url_list:
1125             try:
1126                 # It also downloads the videos
1127                 res = self.extract_info(url)
1128             except UnavailableVideoError:
1129                 self.report_error('unable to download video')
1130             except MaxDownloadsReached:
1131                 self.to_screen('[info] Maximum number of downloaded files reached.')
1132                 raise
1133             else:
1134                 if self.params.get('dump_single_json', False):
1135                     self.to_stdout(json.dumps(res))
1136
1137         return self._download_retcode
1138
1139     def download_with_info_file(self, info_filename):
1140         with io.open(info_filename, 'r', encoding='utf-8') as f:
1141             info = json.load(f)
1142         try:
1143             self.process_ie_result(info, download=True)
1144         except DownloadError:
1145             webpage_url = info.get('webpage_url')
1146             if webpage_url is not None:
1147                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1148                 return self.download([webpage_url])
1149             else:
1150                 raise
1151         return self._download_retcode
1152
1153     def post_process(self, filename, ie_info):
1154         """Run all the postprocessors on the given file."""
1155         info = dict(ie_info)
1156         info['filepath'] = filename
1157         keep_video = None
1158         pps_chain = []
1159         if ie_info.get('__postprocessors') is not None:
1160             pps_chain.extend(ie_info['__postprocessors'])
1161         pps_chain.extend(self._pps)
1162         for pp in pps_chain:
1163             try:
1164                 keep_video_wish, new_info = pp.run(info)
1165                 if keep_video_wish is not None:
1166                     if keep_video_wish:
1167                         keep_video = keep_video_wish
1168                     elif keep_video is None:
1169                         # No clear decision yet, let IE decide
1170                         keep_video = keep_video_wish
1171             except PostProcessingError as e:
1172                 self.report_error(e.msg)
1173         if keep_video is False and not self.params.get('keepvideo', False):
1174             try:
1175                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1176                 os.remove(encodeFilename(filename))
1177             except (IOError, OSError):
1178                 self.report_warning('Unable to remove downloaded video file')
1179
1180     def _make_archive_id(self, info_dict):
1181         # Future-proof against any change in case
1182         # and backwards compatibility with prior versions
1183         extractor = info_dict.get('extractor_key')
1184         if extractor is None:
1185             if 'id' in info_dict:
1186                 extractor = info_dict.get('ie_key')  # key in a playlist
1187         if extractor is None:
1188             return None  # Incomplete video information
1189         return extractor.lower() + ' ' + info_dict['id']
1190
1191     def in_download_archive(self, info_dict):
1192         fn = self.params.get('download_archive')
1193         if fn is None:
1194             return False
1195
1196         vid_id = self._make_archive_id(info_dict)
1197         if vid_id is None:
1198             return False  # Incomplete video information
1199
1200         try:
1201             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1202                 for line in archive_file:
1203                     if line.strip() == vid_id:
1204                         return True
1205         except IOError as ioe:
1206             if ioe.errno != errno.ENOENT:
1207                 raise
1208         return False
1209
1210     def record_download_archive(self, info_dict):
1211         fn = self.params.get('download_archive')
1212         if fn is None:
1213             return
1214         vid_id = self._make_archive_id(info_dict)
1215         assert vid_id
1216         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1217             archive_file.write(vid_id + '\n')
1218
1219     @staticmethod
1220     def format_resolution(format, default='unknown'):
1221         if format.get('vcodec') == 'none':
1222             return 'audio only'
1223         if format.get('resolution') is not None:
1224             return format['resolution']
1225         if format.get('height') is not None:
1226             if format.get('width') is not None:
1227                 res = '%sx%s' % (format['width'], format['height'])
1228             else:
1229                 res = '%sp' % format['height']
1230         elif format.get('width') is not None:
1231             res = '?x%d' % format['width']
1232         else:
1233             res = default
1234         return res
1235
1236     def _format_note(self, fdict):
1237         res = ''
1238         if fdict.get('ext') in ['f4f', 'f4m']:
1239             res += '(unsupported) '
1240         if fdict.get('format_note') is not None:
1241             res += fdict['format_note'] + ' '
1242         if fdict.get('tbr') is not None:
1243             res += '%4dk ' % fdict['tbr']
1244         if fdict.get('container') is not None:
1245             if res:
1246                 res += ', '
1247             res += '%s container' % fdict['container']
1248         if (fdict.get('vcodec') is not None and
1249                 fdict.get('vcodec') != 'none'):
1250             if res:
1251                 res += ', '
1252             res += fdict['vcodec']
1253             if fdict.get('vbr') is not None:
1254                 res += '@'
1255         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1256             res += 'video@'
1257         if fdict.get('vbr') is not None:
1258             res += '%4dk' % fdict['vbr']
1259         if fdict.get('fps') is not None:
1260             res += ', %sfps' % fdict['fps']
1261         if fdict.get('acodec') is not None:
1262             if res:
1263                 res += ', '
1264             if fdict['acodec'] == 'none':
1265                 res += 'video only'
1266             else:
1267                 res += '%-5s' % fdict['acodec']
1268         elif fdict.get('abr') is not None:
1269             if res:
1270                 res += ', '
1271             res += 'audio'
1272         if fdict.get('abr') is not None:
1273             res += '@%3dk' % fdict['abr']
1274         if fdict.get('asr') is not None:
1275             res += ' (%5dHz)' % fdict['asr']
1276         if fdict.get('filesize') is not None:
1277             if res:
1278                 res += ', '
1279             res += format_bytes(fdict['filesize'])
1280         elif fdict.get('filesize_approx') is not None:
1281             if res:
1282                 res += ', '
1283             res += '~' + format_bytes(fdict['filesize_approx'])
1284         return res
1285
1286     def list_formats(self, info_dict):
1287         def line(format, idlen=20):
1288             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1289                 format['format_id'],
1290                 format['ext'],
1291                 self.format_resolution(format),
1292                 self._format_note(format),
1293             ))
1294
1295         formats = info_dict.get('formats', [info_dict])
1296         idlen = max(len('format code'),
1297                     max(len(f['format_id']) for f in formats))
1298         formats_s = [line(f, idlen) for f in formats]
1299         if len(formats) > 1:
1300             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1301             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1302
1303         header_line = line({
1304             'format_id': 'format code', 'ext': 'extension',
1305             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1306         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1307                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1308
1309     def urlopen(self, req):
1310         """ Start an HTTP download """
1311
1312         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1313         # always respected by websites, some tend to give out URLs with non percent-encoded
1314         # non-ASCII characters (see telemb.py, ard.py [#3412])
1315         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1316         # To work around aforementioned issue we will replace request's original URL with
1317         # percent-encoded one
1318         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1319         url = req if req_is_string else req.get_full_url()
1320         url_escaped = escape_url(url)
1321
1322         # Substitute URL if any change after escaping
1323         if url != url_escaped:
1324             if req_is_string:
1325                 req = url_escaped
1326             else:
1327                 req = compat_urllib_request.Request(
1328                     url_escaped, data=req.data, headers=req.headers,
1329                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1330
1331         return self._opener.open(req, timeout=self._socket_timeout)
1332
1333     def print_debug_header(self):
1334         if not self.params.get('verbose'):
1335             return
1336
1337         if type('') is not compat_str:
1338             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1339             self.report_warning(
1340                 'Your Python is broken! Update to a newer and supported version')
1341
1342         stdout_encoding = getattr(
1343             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1344         encoding_str = (
1345             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1346                 locale.getpreferredencoding(),
1347                 sys.getfilesystemencoding(),
1348                 stdout_encoding,
1349                 self.get_encoding()))
1350         write_string(encoding_str, encoding=None)
1351
1352         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1353         try:
1354             sp = subprocess.Popen(
1355                 ['git', 'rev-parse', '--short', 'HEAD'],
1356                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1357                 cwd=os.path.dirname(os.path.abspath(__file__)))
1358             out, err = sp.communicate()
1359             out = out.decode().strip()
1360             if re.match('[0-9a-f]+', out):
1361                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1362         except:
1363             try:
1364                 sys.exc_clear()
1365             except:
1366                 pass
1367         self._write_string('[debug] Python version %s - %s\n' % (
1368             platform.python_version(), platform_name()))
1369
1370         exe_versions = FFmpegPostProcessor.get_versions()
1371         exe_versions['rtmpdump'] = rtmpdump_version()
1372         exe_str = ', '.join(
1373             '%s %s' % (exe, v)
1374             for exe, v in sorted(exe_versions.items())
1375             if v
1376         )
1377         if not exe_str:
1378             exe_str = 'none'
1379         self._write_string('[debug] exe versions: %s\n' % exe_str)
1380
1381         proxy_map = {}
1382         for handler in self._opener.handlers:
1383             if hasattr(handler, 'proxies'):
1384                 proxy_map.update(handler.proxies)
1385         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1386
1387     def _setup_opener(self):
1388         timeout_val = self.params.get('socket_timeout')
1389         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1390
1391         opts_cookiefile = self.params.get('cookiefile')
1392         opts_proxy = self.params.get('proxy')
1393
1394         if opts_cookiefile is None:
1395             self.cookiejar = compat_cookiejar.CookieJar()
1396         else:
1397             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1398                 opts_cookiefile)
1399             if os.access(opts_cookiefile, os.R_OK):
1400                 self.cookiejar.load()
1401
1402         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1403             self.cookiejar)
1404         if opts_proxy is not None:
1405             if opts_proxy == '':
1406                 proxies = {}
1407             else:
1408                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1409         else:
1410             proxies = compat_urllib_request.getproxies()
1411             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1412             if 'http' in proxies and 'https' not in proxies:
1413                 proxies['https'] = proxies['http']
1414         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1415
1416         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1417         https_handler = make_HTTPS_handler(
1418             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1419         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1420         opener = compat_urllib_request.build_opener(
1421             https_handler, proxy_handler, cookie_processor, ydlh)
1422         # Delete the default user-agent header, which would otherwise apply in
1423         # cases where our custom HTTP handler doesn't come into play
1424         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1425         opener.addheaders = []
1426         self._opener = opener
1427
1428     def encode(self, s):
1429         if isinstance(s, bytes):
1430             return s  # Already encoded
1431
1432         try:
1433             return s.encode(self.get_encoding())
1434         except UnicodeEncodeError as err:
1435             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1436             raise
1437
1438     def get_encoding(self):
1439         encoding = self.params.get('encoding')
1440         if encoding is None:
1441             encoding = preferredencoding()
1442         return encoding