youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .compat import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32 )
  33 from .utils import (
  34     escape_url,
  35     ContentTooShortError,
  36     date_from_str,
  37     DateRange,
  38     DEFAULT_OUTTMPL,
  39     determine_ext,
  40     DownloadError,
  41     encodeFilename,
  42     ExtractorError,
  43     format_bytes,
  44     formatSeconds,
  45     get_term_width,
  46     locked_file,
  47     make_HTTPS_handler,
  48     MaxDownloadsReached,
  49     PagedList,
  50     PostProcessingError,
  51     platform_name,
  52     preferredencoding,
  53     SameFileError,
  54     sanitize_filename,
  55     subtitles_filename,
  56     takewhile_inclusive,
  57     UnavailableVideoError,
  58     url_basename,
  59     write_json_file,
  60     write_string,
  61     YoutubeDLHandler,
  62     prepend_extension,
  63 )
  64 from .cache import Cache
  65 from .extractor import get_info_extractor, gen_extractors
  66 from .downloader import get_suitable_downloader
  67 from .downloader.rtmp import rtmpdump_version
  68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  69 from .version import __version__
  70
  71
  72 class YoutubeDL(object):
  73     """YoutubeDL class.
  74
  75     YoutubeDL objects are the ones responsible of downloading the
  76     actual video file and writing it to disk if the user has requested
  77     it, among some other tasks. In most cases there should be one per
  78     program. As, given a video URL, the downloader doesn't know how to
  79     extract all the needed information, task that InfoExtractors do, it
  80     has to pass the URL to one of them.
  81
  82     For this, YoutubeDL objects have a method that allows
  83     InfoExtractors to be registered in a given order. When it is passed
  84     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  85     finds that reports being able to handle it. The InfoExtractor extracts
  86     all the information about the video or videos the URL refers to, and
  87     YoutubeDL process the extracted information, possibly using a File
  88     Downloader to download the video.
  89
  90     YoutubeDL objects accept a lot of parameters. In order not to saturate
  91     the object constructor with arguments, it receives a dictionary of
  92     options instead. These options are available through the params
  93     attribute for the InfoExtractors to use. The YoutubeDL also
  94     registers itself as the downloader in charge for the InfoExtractors
  95     that are added to it, so this is a "mutual registration".
  96
  97     Available options:
  98
  99     username:          Username for authentication purposes.
 100     password:          Password for authentication purposes.
 101     videopassword:     Password for acces a video.
 102     usenetrc:          Use netrc for authentication instead.
 103     verbose:           Print additional info to stdout.
 104     quiet:             Do not print messages to stdout.
 105     no_warnings:       Do not print out anything for warnings.
 106     forceurl:          Force printing final URL.
 107     forcetitle:        Force printing title.
 108     forceid:           Force printing ID.
 109     forcethumbnail:    Force printing thumbnail URL.
 110     forcedescription:  Force printing description.
 111     forcefilename:     Force printing final filename.
 112     forceduration:     Force printing duration.
 113     forcejson:         Force printing info_dict as JSON.
 114     dump_single_json:  Force printing the info_dict of the whole playlist
 115                        (or video) as a single JSON line.
 116     simulate:          Do not download the video files.
 117     format:            Video format code.
 118     format_limit:      Highest quality format to try.
 119     outtmpl:           Template for output names.
 120     restrictfilenames: Do not allow "&" and spaces in file names
 121     ignoreerrors:      Do not stop on download errors.
 122     nooverwrites:      Prevent overwriting files.
 123     playliststart:     Playlist item to start at.
 124     playlistend:       Playlist item to end at.
 125     matchtitle:        Download only matching titles.
 126     rejecttitle:       Reject downloads for matching titles.
 127     logger:            Log messages to a logging.Logger instance.
 128     logtostderr:       Log messages to stderr instead of stdout.
 129     writedescription:  Write the video description to a .description file
 130     writeinfojson:     Write the video description to a .info.json file
 131     writeannotations:  Write the video annotations to a .annotations.xml file
 132     writethumbnail:    Write the thumbnail image to a file
 133     writesubtitles:    Write the video subtitles to a file
 134     writeautomaticsub: Write the automatic subtitles to a file
 135     allsubtitles:      Downloads all the subtitles of the video
 136                        (requires writesubtitles or writeautomaticsub)
 137     listsubtitles:     Lists all available subtitles for the video
 138     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 139     subtitleslangs:    List of languages of the subtitles to download
 140     keepvideo:         Keep the video file after post-processing
 141     daterange:         A DateRange object, download only if the upload_date is in the range.
 142     skip_download:     Skip the actual download of the video file
 143     cachedir:          Location of the cache files in the filesystem.
 144                        False to disable filesystem cache.
 145     noplaylist:        Download single video instead of a playlist if in doubt.
 146     age_limit:         An integer representing the user's age in years.
 147                        Unsuitable videos for the given age are skipped.
 148     min_views:         An integer representing the minimum view count the video
 149                        must have in order to not be skipped.
 150                        Videos without view count information are always
 151                        downloaded. None for no limit.
 152     max_views:         An integer representing the maximum view count.
 153                        Videos that are more popular than that are not
 154                        downloaded.
 155                        Videos without view count information are always
 156                        downloaded. None for no limit.
 157     download_archive:  File name of a file where all downloads are recorded.
 158                        Videos already present in the file are not downloaded
 159                        again.
 160     cookiefile:        File name where cookies should be read from and dumped to.
 161     nocheckcertificate:Do not verify SSL certificates
 162     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 163                        At the moment, this is only supported by YouTube.
 164     proxy:             URL of the proxy server to use
 165     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 166     bidi_workaround:   Work around buggy terminals without bidirectional text
 167                        support, using fridibi
 168     debug_printtraffic:Print out sent and received HTTP traffic
 169     include_ads:       Download ads as well
 170     default_search:    Prepend this string if an input url is not valid.
 171                        'auto' for elaborate guessing
 172     encoding:          Use this encoding instead of the system-specified.
 173     extract_flat:      Do not resolve URLs, return the immediate result.
 174                        Pass in 'in_playlist' to only show this behavior for
 175                        playlist items.
 176
 177     The following parameters are not used by YoutubeDL itself, they are used by
 178     the FileDownloader:
 179     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 180     noresizebuffer, retries, continuedl, noprogress, consoletitle
 181
 182     The following options are used by the post processors:
 183     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 184                        otherwise prefer avconv.
 185     exec_cmd:          Arbitrary command to run after downloading
 186     """
 187
 188     params = None
 189     _ies = []
 190     _pps = []
 191     _download_retcode = None
 192     _num_downloads = None
 193     _screen_file = None
 194
 195     def __init__(self, params=None, auto_init=True):
 196         """Create a FileDownloader object with the given options."""
 197         if params is None:
 198             params = {}
 199         self._ies = []
 200         self._ies_instances = {}
 201         self._pps = []
 202         self._progress_hooks = []
 203         self._download_retcode = 0
 204         self._num_downloads = 0
 205         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 206         self._err_file = sys.stderr
 207         self.params = params
 208         self.cache = Cache(self)
 209
 210         if params.get('bidi_workaround', False):
 211             try:
 212                 import pty
 213                 master, slave = pty.openpty()
 214                 width = get_term_width()
 215                 if width is None:
 216                     width_args = []
 217                 else:
 218                     width_args = ['-w', str(width)]
 219                 sp_kwargs = dict(
 220                     stdin=subprocess.PIPE,
 221                     stdout=slave,
 222                     stderr=self._err_file)
 223                 try:
 224                     self._output_process = subprocess.Popen(
 225                         ['bidiv'] + width_args, **sp_kwargs
 226                     )
 227                 except OSError:
 228                     self._output_process = subprocess.Popen(
 229                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 230                 self._output_channel = os.fdopen(master, 'rb')
 231             except OSError as ose:
 232                 if ose.errno == 2:
 233                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 234                 else:
 235                     raise
 236
 237         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 238                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 239                 and not params.get('restrictfilenames', False)):
 240             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 241             self.report_warning(
 242                 'Assuming --restrict-filenames since file system encoding '
 243                 'cannot encode all characters. '
 244                 'Set the LC_ALL environment variable to fix this.')
 245             self.params['restrictfilenames'] = True
 246
 247         if '%(stitle)s' in self.params.get('outtmpl', ''):
 248             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 249
 250         self._setup_opener()
 251
 252         if auto_init:
 253             self.print_debug_header()
 254             self.add_default_info_extractors()
 255
 256     def add_info_extractor(self, ie):
 257         """Add an InfoExtractor object to the end of the list."""
 258         self._ies.append(ie)
 259         self._ies_instances[ie.ie_key()] = ie
 260         ie.set_downloader(self)
 261
 262     def get_info_extractor(self, ie_key):
 263         """
 264         Get an instance of an IE with name ie_key, it will try to get one from
 265         the _ies list, if there's no instance it will create a new one and add
 266         it to the extractor list.
 267         """
 268         ie = self._ies_instances.get(ie_key)
 269         if ie is None:
 270             ie = get_info_extractor(ie_key)()
 271             self.add_info_extractor(ie)
 272         return ie
 273
 274     def add_default_info_extractors(self):
 275         """
 276         Add the InfoExtractors returned by gen_extractors to the end of the list
 277         """
 278         for ie in gen_extractors():
 279             self.add_info_extractor(ie)
 280
 281     def add_post_processor(self, pp):
 282         """Add a PostProcessor object to the end of the chain."""
 283         self._pps.append(pp)
 284         pp.set_downloader(self)
 285
 286     def add_progress_hook(self, ph):
 287         """Add the progress hook (currently only for the file downloader)"""
 288         self._progress_hooks.append(ph)
 289
 290     def _bidi_workaround(self, message):
 291         if not hasattr(self, '_output_channel'):
 292             return message
 293
 294         assert hasattr(self, '_output_process')
 295         assert isinstance(message, compat_str)
 296         line_count = message.count('\n') + 1
 297         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 298         self._output_process.stdin.flush()
 299         res = ''.join(self._output_channel.readline().decode('utf-8')
 300                        for _ in range(line_count))
 301         return res[:-len('\n')]
 302
 303     def to_screen(self, message, skip_eol=False):
 304         """Print message to stdout if not in quiet mode."""
 305         return self.to_stdout(message, skip_eol, check_quiet=True)
 306
 307     def _write_string(self, s, out=None):
 308         write_string(s, out=out, encoding=self.params.get('encoding'))
 309
 310     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 311         """Print message to stdout if not in quiet mode."""
 312         if self.params.get('logger'):
 313             self.params['logger'].debug(message)
 314         elif not check_quiet or not self.params.get('quiet', False):
 315             message = self._bidi_workaround(message)
 316             terminator = ['\n', ''][skip_eol]
 317             output = message + terminator
 318
 319             self._write_string(output, self._screen_file)
 320
 321     def to_stderr(self, message):
 322         """Print message to stderr."""
 323         assert isinstance(message, compat_str)
 324         if self.params.get('logger'):
 325             self.params['logger'].error(message)
 326         else:
 327             message = self._bidi_workaround(message)
 328             output = message + '\n'
 329             self._write_string(output, self._err_file)
 330
 331     def to_console_title(self, message):
 332         if not self.params.get('consoletitle', False):
 333             return
 334         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 335             # c_wchar_p() might not be necessary if `message` is
 336             # already of type unicode()
 337             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 338         elif 'TERM' in os.environ:
 339             self._write_string('\033]0;%s\007' % message, self._screen_file)
 340
 341     def save_console_title(self):
 342         if not self.params.get('consoletitle', False):
 343             return
 344         if 'TERM' in os.environ:
 345             # Save the title on stack
 346             self._write_string('\033[22;0t', self._screen_file)
 347
 348     def restore_console_title(self):
 349         if not self.params.get('consoletitle', False):
 350             return
 351         if 'TERM' in os.environ:
 352             # Restore the title from stack
 353             self._write_string('\033[23;0t', self._screen_file)
 354
 355     def __enter__(self):
 356         self.save_console_title()
 357         return self
 358
 359     def __exit__(self, *args):
 360         self.restore_console_title()
 361
 362         if self.params.get('cookiefile') is not None:
 363             self.cookiejar.save()
 364
 365     def trouble(self, message=None, tb=None):
 366         """Determine action to take when a download problem appears.
 367
 368         Depending on if the downloader has been configured to ignore
 369         download errors or not, this method may throw an exception or
 370         not when errors are found, after printing the message.
 371
 372         tb, if given, is additional traceback information.
 373         """
 374         if message is not None:
 375             self.to_stderr(message)
 376         if self.params.get('verbose'):
 377             if tb is None:
 378                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 379                     tb = ''
 380                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 381                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 382                     tb += compat_str(traceback.format_exc())
 383                 else:
 384                     tb_data = traceback.format_list(traceback.extract_stack())
 385                     tb = ''.join(tb_data)
 386             self.to_stderr(tb)
 387         if not self.params.get('ignoreerrors', False):
 388             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 389                 exc_info = sys.exc_info()[1].exc_info
 390             else:
 391                 exc_info = sys.exc_info()
 392             raise DownloadError(message, exc_info)
 393         self._download_retcode = 1
 394
 395     def report_warning(self, message):
 396         '''
 397         Print the message to stderr, it will be prefixed with 'WARNING:'
 398         If stderr is a tty file the 'WARNING:' will be colored
 399         '''
 400         if self.params.get('logger') is not None:
 401             self.params['logger'].warning(message)
 402         else:
 403             if self.params.get('no_warnings'):
 404                 return
 405             if self._err_file.isatty() and os.name != 'nt':
 406                 _msg_header = '\033[0;33mWARNING:\033[0m'
 407             else:
 408                 _msg_header = 'WARNING:'
 409             warning_message = '%s %s' % (_msg_header, message)
 410             self.to_stderr(warning_message)
 411
 412     def report_error(self, message, tb=None):
 413         '''
 414         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 415         in red if stderr is a tty file.
 416         '''
 417         if self._err_file.isatty() and os.name != 'nt':
 418             _msg_header = '\033[0;31mERROR:\033[0m'
 419         else:
 420             _msg_header = 'ERROR:'
 421         error_message = '%s %s' % (_msg_header, message)
 422         self.trouble(error_message, tb)
 423
 424     def report_file_already_downloaded(self, file_name):
 425         """Report file has already been fully downloaded."""
 426         try:
 427             self.to_screen('[download] %s has already been downloaded' % file_name)
 428         except UnicodeEncodeError:
 429             self.to_screen('[download] The file has already been downloaded')
 430
 431     def prepare_filename(self, info_dict):
 432         """Generate the output filename."""
 433         try:
 434             template_dict = dict(info_dict)
 435
 436             template_dict['epoch'] = int(time.time())
 437             autonumber_size = self.params.get('autonumber_size')
 438             if autonumber_size is None:
 439                 autonumber_size = 5
 440             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 441             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 442             if template_dict.get('playlist_index') is not None:
 443                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 444             if template_dict.get('resolution') is None:
 445                 if template_dict.get('width') and template_dict.get('height'):
 446                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 447                 elif template_dict.get('height'):
 448                     template_dict['resolution'] = '%sp' % template_dict['height']
 449                 elif template_dict.get('width'):
 450                     template_dict['resolution'] = '?x%d' % template_dict['width']
 451
 452             sanitize = lambda k, v: sanitize_filename(
 453                 compat_str(v),
 454                 restricted=self.params.get('restrictfilenames'),
 455                 is_id=(k == 'id'))
 456             template_dict = dict((k, sanitize(k, v))
 457                                  for k, v in template_dict.items()
 458                                  if v is not None)
 459             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 460
 461             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 462             tmpl = compat_expanduser(outtmpl)
 463             filename = tmpl % template_dict
 464             return filename
 465         except ValueError as err:
 466             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 467             return None
 468
 469     def _match_entry(self, info_dict):
 470         """ Returns None iff the file should be downloaded """
 471
 472         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 473         if 'title' in info_dict:
 474             # This can happen when we're just evaluating the playlist
 475             title = info_dict['title']
 476             matchtitle = self.params.get('matchtitle', False)
 477             if matchtitle:
 478                 if not re.search(matchtitle, title, re.IGNORECASE):
 479                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 480             rejecttitle = self.params.get('rejecttitle', False)
 481             if rejecttitle:
 482                 if re.search(rejecttitle, title, re.IGNORECASE):
 483                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 484         date = info_dict.get('upload_date', None)
 485         if date is not None:
 486             dateRange = self.params.get('daterange', DateRange())
 487             if date not in dateRange:
 488                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 489         view_count = info_dict.get('view_count', None)
 490         if view_count is not None:
 491             min_views = self.params.get('min_views')
 492             if min_views is not None and view_count < min_views:
 493                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 494             max_views = self.params.get('max_views')
 495             if max_views is not None and view_count > max_views:
 496                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 497         age_limit = self.params.get('age_limit')
 498         if age_limit is not None:
 499             actual_age_limit = info_dict.get('age_limit')
 500             if actual_age_limit is None:
 501                 actual_age_limit = 0
 502             if age_limit < actual_age_limit:
 503                 return 'Skipping "' + title + '" because it is age restricted'
 504         if self.in_download_archive(info_dict):
 505             return '%s has already been recorded in archive' % video_title
 506         return None
 507
 508     @staticmethod
 509     def add_extra_info(info_dict, extra_info):
 510         '''Set the keys from extra_info in info dict if they are missing'''
 511         for key, value in extra_info.items():
 512             info_dict.setdefault(key, value)
 513
 514     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 515                      process=True):
 516         '''
 517         Returns a list with a dictionary for each video we find.
 518         If 'download', also downloads the videos.
 519         extra_info is a dict containing the extra values to add to each result
 520          '''
 521
 522         if ie_key:
 523             ies = [self.get_info_extractor(ie_key)]
 524         else:
 525             ies = self._ies
 526
 527         for ie in ies:
 528             if not ie.suitable(url):
 529                 continue
 530
 531             if not ie.working():
 532                 self.report_warning('The program functionality for this site has been marked as broken, '
 533                                     'and will probably not work.')
 534
 535             try:
 536                 ie_result = ie.extract(url)
 537                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 538                     break
 539                 if isinstance(ie_result, list):
 540                     # Backwards compatibility: old IE result format
 541                     ie_result = {
 542                         '_type': 'compat_list',
 543                         'entries': ie_result,
 544                     }
 545                 self.add_default_extra_info(ie_result, ie, url)
 546                 if process:
 547                     return self.process_ie_result(ie_result, download, extra_info)
 548                 else:
 549                     return ie_result
 550             except ExtractorError as de: # An error we somewhat expected
 551                 self.report_error(compat_str(de), de.format_traceback())
 552                 break
 553             except MaxDownloadsReached:
 554                 raise
 555             except Exception as e:
 556                 if self.params.get('ignoreerrors', False):
 557                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 558                     break
 559                 else:
 560                     raise
 561         else:
 562             self.report_error('no suitable InfoExtractor for URL %s' % url)
 563
 564     def add_default_extra_info(self, ie_result, ie, url):
 565         self.add_extra_info(ie_result, {
 566             'extractor': ie.IE_NAME,
 567             'webpage_url': url,
 568             'webpage_url_basename': url_basename(url),
 569             'extractor_key': ie.ie_key(),
 570         })
 571
 572     def process_ie_result(self, ie_result, download=True, extra_info={}):
 573         """
 574         Take the result of the ie(may be modified) and resolve all unresolved
 575         references (URLs, playlist items).
 576
 577         It will also download the videos if 'download'.
 578         Returns the resolved ie_result.
 579         """
 580
 581         result_type = ie_result.get('_type', 'video')
 582
 583         if result_type in ('url', 'url_transparent'):
 584             extract_flat = self.params.get('extract_flat', False)
 585             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 586                     extract_flat is True):
 587                 if self.params.get('forcejson', False):
 588                     self.to_stdout(json.dumps(ie_result))
 589                 return ie_result
 590
 591         if result_type == 'video':
 592             self.add_extra_info(ie_result, extra_info)
 593             return self.process_video_result(ie_result, download=download)
 594         elif result_type == 'url':
 595             # We have to add extra_info to the results because it may be
 596             # contained in a playlist
 597             return self.extract_info(ie_result['url'],
 598                                      download,
 599                                      ie_key=ie_result.get('ie_key'),
 600                                      extra_info=extra_info)
 601         elif result_type == 'url_transparent':
 602             # Use the information from the embedding page
 603             info = self.extract_info(
 604                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 605                 extra_info=extra_info, download=False, process=False)
 606
 607             def make_result(embedded_info):
 608                 new_result = ie_result.copy()
 609                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 610                           'entries', 'ie_key', 'duration',
 611                           'subtitles', 'annotations', 'format',
 612                           'thumbnail', 'thumbnails'):
 613                     if f in new_result:
 614                         del new_result[f]
 615                     if f in embedded_info:
 616                         new_result[f] = embedded_info[f]
 617                 return new_result
 618             new_result = make_result(info)
 619
 620             assert new_result.get('_type') != 'url_transparent'
 621             if new_result.get('_type') == 'compat_list':
 622                 new_result['entries'] = [
 623                     make_result(e) for e in new_result['entries']]
 624
 625             return self.process_ie_result(
 626                 new_result, download=download, extra_info=extra_info)
 627         elif result_type == 'playlist' or playlist == 'multi_video':
 628             # We process each entry in the playlist
 629             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 630             self.to_screen('[download] Downloading playlist: %s' % playlist)
 631
 632             playlist_results = []
 633
 634             playliststart = self.params.get('playliststart', 1) - 1
 635             playlistend = self.params.get('playlistend', None)
 636             # For backwards compatibility, interpret -1 as whole list
 637             if playlistend == -1:
 638                 playlistend = None
 639
 640             if isinstance(ie_result['entries'], list):
 641                 n_all_entries = len(ie_result['entries'])
 642                 entries = ie_result['entries'][playliststart:playlistend]
 643                 n_entries = len(entries)
 644                 self.to_screen(
 645                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 646                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 647             else:
 648                 assert isinstance(ie_result['entries'], PagedList)
 649                 entries = ie_result['entries'].getslice(
 650                     playliststart, playlistend)
 651                 n_entries = len(entries)
 652                 self.to_screen(
 653                     "[%s] playlist %s: Downloading %d videos" %
 654                     (ie_result['extractor'], playlist, n_entries))
 655
 656             for i, entry in enumerate(entries, 1):
 657                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 658                 extra = {
 659                     'n_entries': n_entries,
 660                     'playlist': playlist,
 661                     'playlist_id': ie_result.get('id'),
 662                     'playlist_title': ie_result.get('title'),
 663                     'playlist_index': i + playliststart,
 664                     'extractor': ie_result['extractor'],
 665                     'webpage_url': ie_result['webpage_url'],
 666                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 667                     'extractor_key': ie_result['extractor_key'],
 668                 }
 669
 670                 reason = self._match_entry(entry)
 671                 if reason is not None:
 672                     self.to_screen('[download] ' + reason)
 673                     continue
 674
 675                 entry_result = self.process_ie_result(entry,
 676                                                       download=download,
 677                                                       extra_info=extra)
 678                 playlist_results.append(entry_result)
 679             ie_result['entries'] = playlist_results
 680             return ie_result
 681         elif result_type == 'compat_list':
 682             self.report_warning(
 683                 'Extractor %s returned a compat_list result. '
 684                 'It needs to be updated.' % ie_result.get('extractor'))
 685             def _fixup(r):
 686                 self.add_extra_info(r,
 687                     {
 688                         'extractor': ie_result['extractor'],
 689                         'webpage_url': ie_result['webpage_url'],
 690                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 691                         'extractor_key': ie_result['extractor_key'],
 692                     })
 693                 return r
 694             ie_result['entries'] = [
 695                 self.process_ie_result(_fixup(r), download, extra_info)
 696                 for r in ie_result['entries']
 697             ]
 698             return ie_result
 699         else:
 700             raise Exception('Invalid result type: %s' % result_type)
 701
 702     def select_format(self, format_spec, available_formats):
 703         if format_spec == 'best' or format_spec is None:
 704             return available_formats[-1]
 705         elif format_spec == 'worst':
 706             return available_formats[0]
 707         elif format_spec == 'bestaudio':
 708             audio_formats = [
 709                 f for f in available_formats
 710                 if f.get('vcodec') == 'none']
 711             if audio_formats:
 712                 return audio_formats[-1]
 713         elif format_spec == 'worstaudio':
 714             audio_formats = [
 715                 f for f in available_formats
 716                 if f.get('vcodec') == 'none']
 717             if audio_formats:
 718                 return audio_formats[0]
 719         elif format_spec == 'bestvideo':
 720             video_formats = [
 721                 f for f in available_formats
 722                 if f.get('acodec') == 'none']
 723             if video_formats:
 724                 return video_formats[-1]
 725         elif format_spec == 'worstvideo':
 726             video_formats = [
 727                 f for f in available_formats
 728                 if f.get('acodec') == 'none']
 729             if video_formats:
 730                 return video_formats[0]
 731         else:
 732             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 733             if format_spec in extensions:
 734                 filter_f = lambda f: f['ext'] == format_spec
 735             else:
 736                 filter_f = lambda f: f['format_id'] == format_spec
 737             matches = list(filter(filter_f, available_formats))
 738             if matches:
 739                 return matches[-1]
 740         return None
 741
 742     def process_video_result(self, info_dict, download=True):
 743         assert info_dict.get('_type', 'video') == 'video'
 744
 745         if 'id' not in info_dict:
 746             raise ExtractorError('Missing "id" field in extractor result')
 747         if 'title' not in info_dict:
 748             raise ExtractorError('Missing "title" field in extractor result')
 749
 750         if 'playlist' not in info_dict:
 751             # It isn't part of a playlist
 752             info_dict['playlist'] = None
 753             info_dict['playlist_index'] = None
 754
 755         thumbnails = info_dict.get('thumbnails')
 756         if thumbnails:
 757             thumbnails.sort(key=lambda t: (
 758                 t.get('width'), t.get('height'), t.get('url')))
 759             for t in thumbnails:
 760                 if 'width' in t and 'height' in t:
 761                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 762
 763         if thumbnails and 'thumbnail' not in info_dict:
 764             info_dict['thumbnail'] = thumbnails[-1]['url']
 765
 766         if 'display_id' not in info_dict and 'id' in info_dict:
 767             info_dict['display_id'] = info_dict['id']
 768
 769         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 770             upload_date = datetime.datetime.utcfromtimestamp(
 771                 info_dict['timestamp'])
 772             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 773
 774         # This extractors handle format selection themselves
 775         if info_dict['extractor'] in ['Youku']:
 776             if download:
 777                 self.process_info(info_dict)
 778             return info_dict
 779
 780         # We now pick which formats have to be downloaded
 781         if info_dict.get('formats') is None:
 782             # There's only one format available
 783             formats = [info_dict]
 784         else:
 785             formats = info_dict['formats']
 786
 787         if not formats:
 788             raise ExtractorError('No video formats found!')
 789
 790         # We check that all the formats have the format and format_id fields
 791         for i, format in enumerate(formats):
 792             if 'url' not in format:
 793                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 794
 795             if format.get('format_id') is None:
 796                 format['format_id'] = compat_str(i)
 797             if format.get('format') is None:
 798                 format['format'] = '{id} - {res}{note}'.format(
 799                     id=format['format_id'],
 800                     res=self.format_resolution(format),
 801                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 802                 )
 803             # Automatically determine file extension if missing
 804             if 'ext' not in format:
 805                 format['ext'] = determine_ext(format['url']).lower()
 806
 807         format_limit = self.params.get('format_limit', None)
 808         if format_limit:
 809             formats = list(takewhile_inclusive(
 810                 lambda f: f['format_id'] != format_limit, formats
 811             ))
 812
 813         # TODO Central sorting goes here
 814
 815         if formats[0] is not info_dict:
 816             # only set the 'formats' fields if the original info_dict list them
 817             # otherwise we end up with a circular reference, the first (and unique)
 818             # element in the 'formats' field in info_dict is info_dict itself,
 819             # wich can't be exported to json
 820             info_dict['formats'] = formats
 821         if self.params.get('listformats', None):
 822             self.list_formats(info_dict)
 823             return
 824
 825         req_format = self.params.get('format')
 826         if req_format is None:
 827             req_format = 'best'
 828         formats_to_download = []
 829         # The -1 is for supporting YoutubeIE
 830         if req_format in ('-1', 'all'):
 831             formats_to_download = formats
 832         else:
 833             for rfstr in req_format.split(','):
 834                 # We can accept formats requested in the format: 34/5/best, we pick
 835                 # the first that is available, starting from left
 836                 req_formats = rfstr.split('/')
 837                 for rf in req_formats:
 838                     if re.match(r'.+?\+.+?', rf) is not None:
 839                         # Two formats have been requested like '137+139'
 840                         format_1, format_2 = rf.split('+')
 841                         formats_info = (self.select_format(format_1, formats),
 842                             self.select_format(format_2, formats))
 843                         if all(formats_info):
 844                             # The first format must contain the video and the
 845                             # second the audio
 846                             if formats_info[0].get('vcodec') == 'none':
 847                                 self.report_error('The first format must '
 848                                     'contain the video, try using '
 849                                     '"-f %s+%s"' % (format_2, format_1))
 850                                 return
 851                             selected_format = {
 852                                 'requested_formats': formats_info,
 853                                 'format': rf,
 854                                 'ext': formats_info[0]['ext'],
 855                             }
 856                         else:
 857                             selected_format = None
 858                     else:
 859                         selected_format = self.select_format(rf, formats)
 860                     if selected_format is not None:
 861                         formats_to_download.append(selected_format)
 862                         break
 863         if not formats_to_download:
 864             raise ExtractorError('requested format not available',
 865                                  expected=True)
 866
 867         if download:
 868             if len(formats_to_download) > 1:
 869                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 870             for format in formats_to_download:
 871                 new_info = dict(info_dict)
 872                 new_info.update(format)
 873                 self.process_info(new_info)
 874         # We update the info dict with the best quality format (backwards compatibility)
 875         info_dict.update(formats_to_download[-1])
 876         return info_dict
 877
 878     def process_info(self, info_dict):
 879         """Process a single resolved IE result."""
 880
 881         assert info_dict.get('_type', 'video') == 'video'
 882
 883         max_downloads = self.params.get('max_downloads')
 884         if max_downloads is not None:
 885             if self._num_downloads >= int(max_downloads):
 886                 raise MaxDownloadsReached()
 887
 888         info_dict['fulltitle'] = info_dict['title']
 889         if len(info_dict['title']) > 200:
 890             info_dict['title'] = info_dict['title'][:197] + '...'
 891
 892         # Keep for backwards compatibility
 893         info_dict['stitle'] = info_dict['title']
 894
 895         if 'format' not in info_dict:
 896             info_dict['format'] = info_dict['ext']
 897
 898         reason = self._match_entry(info_dict)
 899         if reason is not None:
 900             self.to_screen('[download] ' + reason)
 901             return
 902
 903         self._num_downloads += 1
 904
 905         filename = self.prepare_filename(info_dict)
 906
 907         # Forced printings
 908         if self.params.get('forcetitle', False):
 909             self.to_stdout(info_dict['fulltitle'])
 910         if self.params.get('forceid', False):
 911             self.to_stdout(info_dict['id'])
 912         if self.params.get('forceurl', False):
 913             # For RTMP URLs, also include the playpath
 914             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 915         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 916             self.to_stdout(info_dict['thumbnail'])
 917         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 918             self.to_stdout(info_dict['description'])
 919         if self.params.get('forcefilename', False) and filename is not None:
 920             self.to_stdout(filename)
 921         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 922             self.to_stdout(formatSeconds(info_dict['duration']))
 923         if self.params.get('forceformat', False):
 924             self.to_stdout(info_dict['format'])
 925         if self.params.get('forcejson', False):
 926             info_dict['_filename'] = filename
 927             self.to_stdout(json.dumps(info_dict))
 928         if self.params.get('dump_single_json', False):
 929             info_dict['_filename'] = filename
 930
 931         # Do nothing else if in simulate mode
 932         if self.params.get('simulate', False):
 933             return
 934
 935         if filename is None:
 936             return
 937
 938         try:
 939             dn = os.path.dirname(encodeFilename(filename))
 940             if dn and not os.path.exists(dn):
 941                 os.makedirs(dn)
 942         except (OSError, IOError) as err:
 943             self.report_error('unable to create directory ' + compat_str(err))
 944             return
 945
 946         if self.params.get('writedescription', False):
 947             descfn = filename + '.description'
 948             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 949                 self.to_screen('[info] Video description is already present')
 950             else:
 951                 try:
 952                     self.to_screen('[info] Writing video description to: ' + descfn)
 953                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 954                         descfile.write(info_dict['description'])
 955                 except (KeyError, TypeError):
 956                     self.report_warning('There\'s no description to write.')
 957                 except (OSError, IOError):
 958                     self.report_error('Cannot write description file ' + descfn)
 959                     return
 960
 961         if self.params.get('writeannotations', False):
 962             annofn = filename + '.annotations.xml'
 963             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 964                 self.to_screen('[info] Video annotations are already present')
 965             else:
 966                 try:
 967                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 968                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 969                         annofile.write(info_dict['annotations'])
 970                 except (KeyError, TypeError):
 971                     self.report_warning('There are no annotations to write.')
 972                 except (OSError, IOError):
 973                     self.report_error('Cannot write annotations file: ' + annofn)
 974                     return
 975
 976         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 977                                        self.params.get('writeautomaticsub')])
 978
 979         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 980             # subtitles download errors are already managed as troubles in relevant IE
 981             # that way it will silently go on when used with unsupporting IE
 982             subtitles = info_dict['subtitles']
 983             sub_format = self.params.get('subtitlesformat', 'srt')
 984             for sub_lang in subtitles.keys():
 985                 sub = subtitles[sub_lang]
 986                 if sub is None:
 987                     continue
 988                 try:
 989                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 990                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 991                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 992                     else:
 993                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 994                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 995                                 subfile.write(sub)
 996                 except (OSError, IOError):
 997                     self.report_error('Cannot write subtitles file ' + sub_filename)
 998                     return
 999
1000         if self.params.get('writeinfojson', False):
1001             infofn = os.path.splitext(filename)[0] + '.info.json'
1002             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1003                 self.to_screen('[info] Video description metadata is already present')
1004             else:
1005                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1006                 try:
1007                     write_json_file(info_dict, infofn)
1008                 except (OSError, IOError):
1009                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1010                     return
1011
1012         if self.params.get('writethumbnail', False):
1013             if info_dict.get('thumbnail') is not None:
1014                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1015                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1016                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1017                     self.to_screen('[%s] %s: Thumbnail is already present' %
1018                                    (info_dict['extractor'], info_dict['id']))
1019                 else:
1020                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1021                                    (info_dict['extractor'], info_dict['id']))
1022                     try:
1023                         uf = self.urlopen(info_dict['thumbnail'])
1024                         with open(thumb_filename, 'wb') as thumbf:
1025                             shutil.copyfileobj(uf, thumbf)
1026                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1027                             (info_dict['extractor'], info_dict['id'], thumb_filename))
1028                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1029                         self.report_warning('Unable to download thumbnail "%s": %s' %
1030                             (info_dict['thumbnail'], compat_str(err)))
1031
1032         if not self.params.get('skip_download', False):
1033             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1034                 success = True
1035             else:
1036                 try:
1037                     def dl(name, info):
1038                         fd = get_suitable_downloader(info)(self, self.params)
1039                         for ph in self._progress_hooks:
1040                             fd.add_progress_hook(ph)
1041                         if self.params.get('verbose'):
1042                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1043                         return fd.download(name, info)
1044                     if info_dict.get('requested_formats') is not None:
1045                         downloaded = []
1046                         success = True
1047                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1048                         if not merger._executable:
1049                             postprocessors = []
1050                             self.report_warning('You have requested multiple '
1051                                 'formats but ffmpeg or avconv are not installed.'
1052                                 ' The formats won\'t be merged')
1053                         else:
1054                             postprocessors = [merger]
1055                         for f in info_dict['requested_formats']:
1056                             new_info = dict(info_dict)
1057                             new_info.update(f)
1058                             fname = self.prepare_filename(new_info)
1059                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1060                             downloaded.append(fname)
1061                             partial_success = dl(fname, new_info)
1062                             success = success and partial_success
1063                         info_dict['__postprocessors'] = postprocessors
1064                         info_dict['__files_to_merge'] = downloaded
1065                     else:
1066                         # Just a single file
1067                         success = dl(filename, info_dict)
1068                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1069                     self.report_error('unable to download video data: %s' % str(err))
1070                     return
1071                 except (OSError, IOError) as err:
1072                     raise UnavailableVideoError(err)
1073                 except (ContentTooShortError, ) as err:
1074                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1075                     return
1076
1077             if success:
1078                 try:
1079                     self.post_process(filename, info_dict)
1080                 except (PostProcessingError) as err:
1081                     self.report_error('postprocessing: %s' % str(err))
1082                     return
1083
1084         self.record_download_archive(info_dict)
1085
1086     def download(self, url_list):
1087         """Download a given list of URLs."""
1088         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1089         if (len(url_list) > 1 and
1090                 '%' not in outtmpl
1091                 and self.params.get('max_downloads') != 1):
1092             raise SameFileError(outtmpl)
1093
1094         for url in url_list:
1095             try:
1096                 #It also downloads the videos
1097                 res = self.extract_info(url)
1098             except UnavailableVideoError:
1099                 self.report_error('unable to download video')
1100             except MaxDownloadsReached:
1101                 self.to_screen('[info] Maximum number of downloaded files reached.')
1102                 raise
1103             else:
1104                 if self.params.get('dump_single_json', False):
1105                     self.to_stdout(json.dumps(res))
1106
1107         return self._download_retcode
1108
1109     def download_with_info_file(self, info_filename):
1110         with io.open(info_filename, 'r', encoding='utf-8') as f:
1111             info = json.load(f)
1112         try:
1113             self.process_ie_result(info, download=True)
1114         except DownloadError:
1115             webpage_url = info.get('webpage_url')
1116             if webpage_url is not None:
1117                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1118                 return self.download([webpage_url])
1119             else:
1120                 raise
1121         return self._download_retcode
1122
1123     def post_process(self, filename, ie_info):
1124         """Run all the postprocessors on the given file."""
1125         info = dict(ie_info)
1126         info['filepath'] = filename
1127         keep_video = None
1128         pps_chain = []
1129         if ie_info.get('__postprocessors') is not None:
1130             pps_chain.extend(ie_info['__postprocessors'])
1131         pps_chain.extend(self._pps)
1132         for pp in pps_chain:
1133             try:
1134                 keep_video_wish, new_info = pp.run(info)
1135                 if keep_video_wish is not None:
1136                     if keep_video_wish:
1137                         keep_video = keep_video_wish
1138                     elif keep_video is None:
1139                         # No clear decision yet, let IE decide
1140                         keep_video = keep_video_wish
1141             except PostProcessingError as e:
1142                 self.report_error(e.msg)
1143         if keep_video is False and not self.params.get('keepvideo', False):
1144             try:
1145                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1146                 os.remove(encodeFilename(filename))
1147             except (IOError, OSError):
1148                 self.report_warning('Unable to remove downloaded video file')
1149
1150     def _make_archive_id(self, info_dict):
1151         # Future-proof against any change in case
1152         # and backwards compatibility with prior versions
1153         extractor = info_dict.get('extractor_key')
1154         if extractor is None:
1155             if 'id' in info_dict:
1156                 extractor = info_dict.get('ie_key')  # key in a playlist
1157         if extractor is None:
1158             return None  # Incomplete video information
1159         return extractor.lower() + ' ' + info_dict['id']
1160
1161     def in_download_archive(self, info_dict):
1162         fn = self.params.get('download_archive')
1163         if fn is None:
1164             return False
1165
1166         vid_id = self._make_archive_id(info_dict)
1167         if vid_id is None:
1168             return False  # Incomplete video information
1169
1170         try:
1171             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1172                 for line in archive_file:
1173                     if line.strip() == vid_id:
1174                         return True
1175         except IOError as ioe:
1176             if ioe.errno != errno.ENOENT:
1177                 raise
1178         return False
1179
1180     def record_download_archive(self, info_dict):
1181         fn = self.params.get('download_archive')
1182         if fn is None:
1183             return
1184         vid_id = self._make_archive_id(info_dict)
1185         assert vid_id
1186         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1187             archive_file.write(vid_id + '\n')
1188
1189     @staticmethod
1190     def format_resolution(format, default='unknown'):
1191         if format.get('vcodec') == 'none':
1192             return 'audio only'
1193         if format.get('resolution') is not None:
1194             return format['resolution']
1195         if format.get('height') is not None:
1196             if format.get('width') is not None:
1197                 res = '%sx%s' % (format['width'], format['height'])
1198             else:
1199                 res = '%sp' % format['height']
1200         elif format.get('width') is not None:
1201             res = '?x%d' % format['width']
1202         else:
1203             res = default
1204         return res
1205
1206     def _format_note(self, fdict):
1207         res = ''
1208         if fdict.get('ext') in ['f4f', 'f4m']:
1209             res += '(unsupported) '
1210         if fdict.get('format_note') is not None:
1211             res += fdict['format_note'] + ' '
1212         if fdict.get('tbr') is not None:
1213             res += '%4dk ' % fdict['tbr']
1214         if fdict.get('container') is not None:
1215             if res:
1216                 res += ', '
1217             res += '%s container' % fdict['container']
1218         if (fdict.get('vcodec') is not None and
1219                 fdict.get('vcodec') != 'none'):
1220             if res:
1221                 res += ', '
1222             res += fdict['vcodec']
1223             if fdict.get('vbr') is not None:
1224                 res += '@'
1225         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1226             res += 'video@'
1227         if fdict.get('vbr') is not None:
1228             res += '%4dk' % fdict['vbr']
1229         if fdict.get('fps') is not None:
1230             res += ', %sfps' % fdict['fps']
1231         if fdict.get('acodec') is not None:
1232             if res:
1233                 res += ', '
1234             if fdict['acodec'] == 'none':
1235                 res += 'video only'
1236             else:
1237                 res += '%-5s' % fdict['acodec']
1238         elif fdict.get('abr') is not None:
1239             if res:
1240                 res += ', '
1241             res += 'audio'
1242         if fdict.get('abr') is not None:
1243             res += '@%3dk' % fdict['abr']
1244         if fdict.get('asr') is not None:
1245             res += ' (%5dHz)' % fdict['asr']
1246         if fdict.get('filesize') is not None:
1247             if res:
1248                 res += ', '
1249             res += format_bytes(fdict['filesize'])
1250         elif fdict.get('filesize_approx') is not None:
1251             if res:
1252                 res += ', '
1253             res += '~' + format_bytes(fdict['filesize_approx'])
1254         return res
1255
1256     def list_formats(self, info_dict):
1257         def line(format, idlen=20):
1258             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1259                 format['format_id'],
1260                 format['ext'],
1261                 self.format_resolution(format),
1262                 self._format_note(format),
1263             ))
1264
1265         formats = info_dict.get('formats', [info_dict])
1266         idlen = max(len('format code'),
1267                     max(len(f['format_id']) for f in formats))
1268         formats_s = [line(f, idlen) for f in formats]
1269         if len(formats) > 1:
1270             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1271             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1272
1273         header_line = line({
1274             'format_id': 'format code', 'ext': 'extension',
1275             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1276         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1277                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1278
1279     def urlopen(self, req):
1280         """ Start an HTTP download """
1281
1282         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1283         # always respected by websites, some tend to give out URLs with non percent-encoded
1284         # non-ASCII characters (see telemb.py, ard.py [#3412])
1285         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1286         # To work around aforementioned issue we will replace request's original URL with
1287         # percent-encoded one
1288         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1289         url = req if req_is_string else req.get_full_url()
1290         url_escaped = escape_url(url)
1291
1292         # Substitute URL if any change after escaping
1293         if url != url_escaped:
1294             if req_is_string:
1295                 req = url_escaped
1296             else:
1297                 req = compat_urllib_request.Request(
1298                     url_escaped, data=req.data, headers=req.headers,
1299                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1300
1301         return self._opener.open(req, timeout=self._socket_timeout)
1302
1303     def print_debug_header(self):
1304         if not self.params.get('verbose'):
1305             return
1306
1307         if type('') is not compat_str:
1308             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1309             self.report_warning(
1310                 'Your Python is broken! Update to a newer and supported version')
1311
1312         stdout_encoding = getattr(
1313             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1314         encoding_str = (
1315             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1316                 locale.getpreferredencoding(),
1317                 sys.getfilesystemencoding(),
1318                 stdout_encoding,
1319                 self.get_encoding()))
1320         write_string(encoding_str, encoding=None)
1321
1322         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1323         try:
1324             sp = subprocess.Popen(
1325                 ['git', 'rev-parse', '--short', 'HEAD'],
1326                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1327                 cwd=os.path.dirname(os.path.abspath(__file__)))
1328             out, err = sp.communicate()
1329             out = out.decode().strip()
1330             if re.match('[0-9a-f]+', out):
1331                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1332         except:
1333             try:
1334                 sys.exc_clear()
1335             except:
1336                 pass
1337         self._write_string('[debug] Python version %s - %s\n' % (
1338             platform.python_version(), platform_name()))
1339
1340         exe_versions = FFmpegPostProcessor.get_versions()
1341         exe_versions['rtmpdump'] = rtmpdump_version()
1342         exe_str = ', '.join(
1343             '%s %s' % (exe, v)
1344             for exe, v in sorted(exe_versions.items())
1345             if v
1346         )
1347         if not exe_str:
1348             exe_str = 'none'
1349         self._write_string('[debug] exe versions: %s\n' % exe_str)
1350
1351         proxy_map = {}
1352         for handler in self._opener.handlers:
1353             if hasattr(handler, 'proxies'):
1354                 proxy_map.update(handler.proxies)
1355         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1356
1357     def _setup_opener(self):
1358         timeout_val = self.params.get('socket_timeout')
1359         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1360
1361         opts_cookiefile = self.params.get('cookiefile')
1362         opts_proxy = self.params.get('proxy')
1363
1364         if opts_cookiefile is None:
1365             self.cookiejar = compat_cookiejar.CookieJar()
1366         else:
1367             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1368                 opts_cookiefile)
1369             if os.access(opts_cookiefile, os.R_OK):
1370                 self.cookiejar.load()
1371
1372         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1373             self.cookiejar)
1374         if opts_proxy is not None:
1375             if opts_proxy == '':
1376                 proxies = {}
1377             else:
1378                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1379         else:
1380             proxies = compat_urllib_request.getproxies()
1381             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1382             if 'http' in proxies and 'https' not in proxies:
1383                 proxies['https'] = proxies['http']
1384         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1385
1386         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1387         https_handler = make_HTTPS_handler(
1388             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1389         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1390         opener = compat_urllib_request.build_opener(
1391             https_handler, proxy_handler, cookie_processor, ydlh)
1392         # Delete the default user-agent header, which would otherwise apply in
1393         # cases where our custom HTTP handler doesn't come into play
1394         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1395         opener.addheaders = []
1396         self._opener = opener
1397
1398     def encode(self, s):
1399         if isinstance(s, bytes):
1400             return s  # Already encoded
1401
1402         try:
1403             return s.encode(self.get_encoding())
1404         except UnicodeEncodeError as err:
1405             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1406             raise
1407
1408     def get_encoding(self):
1409         encoding = self.params.get('encoding')
1410         if encoding is None:
1411             encoding = preferredencoding()
1412         return encoding