youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     DEFAULT_OUTTMPL,
  35     determine_ext,
  36     DownloadError,
  37     encodeFilename,
  38     ExtractorError,
  39     format_bytes,
  40     formatSeconds,
  41     get_term_width,
  42     locked_file,
  43     make_HTTPS_handler,
  44     MaxDownloadsReached,
  45     PagedList,
  46     PostProcessingError,
  47     platform_name,
  48     preferredencoding,
  49     SameFileError,
  50     sanitize_filename,
  51     subtitles_filename,
  52     takewhile_inclusive,
  53     UnavailableVideoError,
  54     url_basename,
  55     write_json_file,
  56     write_string,
  57     YoutubeDLHandler,
  58     prepend_extension,
  59 )
  60 from .extractor import get_info_extractor, gen_extractors
  61 from .downloader import get_suitable_downloader
  62 from .postprocessor import FFmpegMergerPP
  63 from .version import __version__
  64
  65
  66 class YoutubeDL(object):
  67     """YoutubeDL class.
  68
  69     YoutubeDL objects are the ones responsible of downloading the
  70     actual video file and writing it to disk if the user has requested
  71     it, among some other tasks. In most cases there should be one per
  72     program. As, given a video URL, the downloader doesn't know how to
  73     extract all the needed information, task that InfoExtractors do, it
  74     has to pass the URL to one of them.
  75
  76     For this, YoutubeDL objects have a method that allows
  77     InfoExtractors to be registered in a given order. When it is passed
  78     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  79     finds that reports being able to handle it. The InfoExtractor extracts
  80     all the information about the video or videos the URL refers to, and
  81     YoutubeDL process the extracted information, possibly using a File
  82     Downloader to download the video.
  83
  84     YoutubeDL objects accept a lot of parameters. In order not to saturate
  85     the object constructor with arguments, it receives a dictionary of
  86     options instead. These options are available through the params
  87     attribute for the InfoExtractors to use. The YoutubeDL also
  88     registers itself as the downloader in charge for the InfoExtractors
  89     that are added to it, so this is a "mutual registration".
  90
  91     Available options:
  92
  93     username:          Username for authentication purposes.
  94     password:          Password for authentication purposes.
  95     videopassword:     Password for acces a video.
  96     usenetrc:          Use netrc for authentication instead.
  97     verbose:           Print additional info to stdout.
  98     quiet:             Do not print messages to stdout.
  99     no_warnings:       Do not print out anything for warnings.
 100     forceurl:          Force printing final URL.
 101     forcetitle:        Force printing title.
 102     forceid:           Force printing ID.
 103     forcethumbnail:    Force printing thumbnail URL.
 104     forcedescription:  Force printing description.
 105     forcefilename:     Force printing final filename.
 106     forceduration:     Force printing duration.
 107     forcejson:         Force printing info_dict as JSON.
 108     simulate:          Do not download the video files.
 109     format:            Video format code.
 110     format_limit:      Highest quality format to try.
 111     outtmpl:           Template for output names.
 112     restrictfilenames: Do not allow "&" and spaces in file names
 113     ignoreerrors:      Do not stop on download errors.
 114     nooverwrites:      Prevent overwriting files.
 115     playliststart:     Playlist item to start at.
 116     playlistend:       Playlist item to end at.
 117     matchtitle:        Download only matching titles.
 118     rejecttitle:       Reject downloads for matching titles.
 119     logger:            Log messages to a logging.Logger instance.
 120     logtostderr:       Log messages to stderr instead of stdout.
 121     writedescription:  Write the video description to a .description file
 122     writeinfojson:     Write the video description to a .info.json file
 123     writeannotations:  Write the video annotations to a .annotations.xml file
 124     writethumbnail:    Write the thumbnail image to a file
 125     writesubtitles:    Write the video subtitles to a file
 126     writeautomaticsub: Write the automatic subtitles to a file
 127     allsubtitles:      Downloads all the subtitles of the video
 128                        (requires writesubtitles or writeautomaticsub)
 129     listsubtitles:     Lists all available subtitles for the video
 130     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 131     subtitleslangs:    List of languages of the subtitles to download
 132     keepvideo:         Keep the video file after post-processing
 133     daterange:         A DateRange object, download only if the upload_date is in the range.
 134     skip_download:     Skip the actual download of the video file
 135     cachedir:          Location of the cache files in the filesystem.
 136                        None to disable filesystem cache.
 137     noplaylist:        Download single video instead of a playlist if in doubt.
 138     age_limit:         An integer representing the user's age in years.
 139                        Unsuitable videos for the given age are skipped.
 140     min_views:         An integer representing the minimum view count the video
 141                        must have in order to not be skipped.
 142                        Videos without view count information are always
 143                        downloaded. None for no limit.
 144     max_views:         An integer representing the maximum view count.
 145                        Videos that are more popular than that are not
 146                        downloaded.
 147                        Videos without view count information are always
 148                        downloaded. None for no limit.
 149     download_archive:  File name of a file where all downloads are recorded.
 150                        Videos already present in the file are not downloaded
 151                        again.
 152     cookiefile:        File name where cookies should be read from and dumped to.
 153     nocheckcertificate:Do not verify SSL certificates
 154     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 155                        At the moment, this is only supported by YouTube.
 156     proxy:             URL of the proxy server to use
 157     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 158     bidi_workaround:   Work around buggy terminals without bidirectional text
 159                        support, using fridibi
 160     debug_printtraffic:Print out sent and received HTTP traffic
 161     include_ads:       Download ads as well
 162     default_search:    Prepend this string if an input url is not valid.
 163                        'auto' for elaborate guessing
 164     encoding:          Use this encoding instead of the system-specified.
 165     extract_flat:      Do not resolve URLs, return the immediate result.
 166
 167     The following parameters are not used by YoutubeDL itself, they are used by
 168     the FileDownloader:
 169     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 170     noresizebuffer, retries, continuedl, noprogress, consoletitle
 171
 172     The following options are used by the post processors:
 173     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 174                        otherwise prefer avconv.
 175     """
 176
 177     params = None
 178     _ies = []
 179     _pps = []
 180     _download_retcode = None
 181     _num_downloads = None
 182     _screen_file = None
 183
 184     def __init__(self, params=None):
 185         """Create a FileDownloader object with the given options."""
 186         if params is None:
 187             params = {}
 188         self._ies = []
 189         self._ies_instances = {}
 190         self._pps = []
 191         self._progress_hooks = []
 192         self._download_retcode = 0
 193         self._num_downloads = 0
 194         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 195         self._err_file = sys.stderr
 196         self.params = params
 197
 198         if params.get('bidi_workaround', False):
 199             try:
 200                 import pty
 201                 master, slave = pty.openpty()
 202                 width = get_term_width()
 203                 if width is None:
 204                     width_args = []
 205                 else:
 206                     width_args = ['-w', str(width)]
 207                 sp_kwargs = dict(
 208                     stdin=subprocess.PIPE,
 209                     stdout=slave,
 210                     stderr=self._err_file)
 211                 try:
 212                     self._output_process = subprocess.Popen(
 213                         ['bidiv'] + width_args, **sp_kwargs
 214                     )
 215                 except OSError:
 216                     self._output_process = subprocess.Popen(
 217                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 218                 self._output_channel = os.fdopen(master, 'rb')
 219             except OSError as ose:
 220                 if ose.errno == 2:
 221                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 222                 else:
 223                     raise
 224
 225         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 226                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 227                 and not params['restrictfilenames']):
 228             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 229             self.report_warning(
 230                 'Assuming --restrict-filenames since file system encoding '
 231                 'cannot encode all charactes. '
 232                 'Set the LC_ALL environment variable to fix this.')
 233             self.params['restrictfilenames'] = True
 234
 235         if '%(stitle)s' in self.params.get('outtmpl', ''):
 236             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 237
 238         self._setup_opener()
 239
 240     def add_info_extractor(self, ie):
 241         """Add an InfoExtractor object to the end of the list."""
 242         self._ies.append(ie)
 243         self._ies_instances[ie.ie_key()] = ie
 244         ie.set_downloader(self)
 245
 246     def get_info_extractor(self, ie_key):
 247         """
 248         Get an instance of an IE with name ie_key, it will try to get one from
 249         the _ies list, if there's no instance it will create a new one and add
 250         it to the extractor list.
 251         """
 252         ie = self._ies_instances.get(ie_key)
 253         if ie is None:
 254             ie = get_info_extractor(ie_key)()
 255             self.add_info_extractor(ie)
 256         return ie
 257
 258     def add_default_info_extractors(self):
 259         """
 260         Add the InfoExtractors returned by gen_extractors to the end of the list
 261         """
 262         for ie in gen_extractors():
 263             self.add_info_extractor(ie)
 264
 265     def add_post_processor(self, pp):
 266         """Add a PostProcessor object to the end of the chain."""
 267         self._pps.append(pp)
 268         pp.set_downloader(self)
 269
 270     def add_progress_hook(self, ph):
 271         """Add the progress hook (currently only for the file downloader)"""
 272         self._progress_hooks.append(ph)
 273
 274     def _bidi_workaround(self, message):
 275         if not hasattr(self, '_output_channel'):
 276             return message
 277
 278         assert hasattr(self, '_output_process')
 279         assert isinstance(message, compat_str)
 280         line_count = message.count('\n') + 1
 281         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 282         self._output_process.stdin.flush()
 283         res = ''.join(self._output_channel.readline().decode('utf-8')
 284                        for _ in range(line_count))
 285         return res[:-len('\n')]
 286
 287     def to_screen(self, message, skip_eol=False):
 288         """Print message to stdout if not in quiet mode."""
 289         return self.to_stdout(message, skip_eol, check_quiet=True)
 290
 291     def _write_string(self, s, out=None):
 292         write_string(s, out=out, encoding=self.params.get('encoding'))
 293
 294     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 295         """Print message to stdout if not in quiet mode."""
 296         if self.params.get('logger'):
 297             self.params['logger'].debug(message)
 298         elif not check_quiet or not self.params.get('quiet', False):
 299             message = self._bidi_workaround(message)
 300             terminator = ['\n', ''][skip_eol]
 301             output = message + terminator
 302
 303             self._write_string(output, self._screen_file)
 304
 305     def to_stderr(self, message):
 306         """Print message to stderr."""
 307         assert isinstance(message, compat_str)
 308         if self.params.get('logger'):
 309             self.params['logger'].error(message)
 310         else:
 311             message = self._bidi_workaround(message)
 312             output = message + '\n'
 313             self._write_string(output, self._err_file)
 314
 315     def to_console_title(self, message):
 316         if not self.params.get('consoletitle', False):
 317             return
 318         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 319             # c_wchar_p() might not be necessary if `message` is
 320             # already of type unicode()
 321             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 322         elif 'TERM' in os.environ:
 323             self._write_string('\033]0;%s\007' % message, self._screen_file)
 324
 325     def save_console_title(self):
 326         if not self.params.get('consoletitle', False):
 327             return
 328         if 'TERM' in os.environ:
 329             # Save the title on stack
 330             self._write_string('\033[22;0t', self._screen_file)
 331
 332     def restore_console_title(self):
 333         if not self.params.get('consoletitle', False):
 334             return
 335         if 'TERM' in os.environ:
 336             # Restore the title from stack
 337             self._write_string('\033[23;0t', self._screen_file)
 338
 339     def __enter__(self):
 340         self.save_console_title()
 341         return self
 342
 343     def __exit__(self, *args):
 344         self.restore_console_title()
 345
 346         if self.params.get('cookiefile') is not None:
 347             self.cookiejar.save()
 348
 349     def trouble(self, message=None, tb=None):
 350         """Determine action to take when a download problem appears.
 351
 352         Depending on if the downloader has been configured to ignore
 353         download errors or not, this method may throw an exception or
 354         not when errors are found, after printing the message.
 355
 356         tb, if given, is additional traceback information.
 357         """
 358         if message is not None:
 359             self.to_stderr(message)
 360         if self.params.get('verbose'):
 361             if tb is None:
 362                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 363                     tb = ''
 364                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 365                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 366                     tb += compat_str(traceback.format_exc())
 367                 else:
 368                     tb_data = traceback.format_list(traceback.extract_stack())
 369                     tb = ''.join(tb_data)
 370             self.to_stderr(tb)
 371         if not self.params.get('ignoreerrors', False):
 372             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 373                 exc_info = sys.exc_info()[1].exc_info
 374             else:
 375                 exc_info = sys.exc_info()
 376             raise DownloadError(message, exc_info)
 377         self._download_retcode = 1
 378
 379     def report_warning(self, message):
 380         '''
 381         Print the message to stderr, it will be prefixed with 'WARNING:'
 382         If stderr is a tty file the 'WARNING:' will be colored
 383         '''
 384         if self.params.get('logger') is not None:
 385             self.params['logger'].warning(message)
 386         else:
 387             if self.params.get('no_warnings'):
 388                 return
 389             if self._err_file.isatty() and os.name != 'nt':
 390                 _msg_header = '\033[0;33mWARNING:\033[0m'
 391             else:
 392                 _msg_header = 'WARNING:'
 393             warning_message = '%s %s' % (_msg_header, message)
 394             self.to_stderr(warning_message)
 395
 396     def report_error(self, message, tb=None):
 397         '''
 398         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 399         in red if stderr is a tty file.
 400         '''
 401         if self._err_file.isatty() and os.name != 'nt':
 402             _msg_header = '\033[0;31mERROR:\033[0m'
 403         else:
 404             _msg_header = 'ERROR:'
 405         error_message = '%s %s' % (_msg_header, message)
 406         self.trouble(error_message, tb)
 407
 408     def report_file_already_downloaded(self, file_name):
 409         """Report file has already been fully downloaded."""
 410         try:
 411             self.to_screen('[download] %s has already been downloaded' % file_name)
 412         except UnicodeEncodeError:
 413             self.to_screen('[download] The file has already been downloaded')
 414
 415     def prepare_filename(self, info_dict):
 416         """Generate the output filename."""
 417         try:
 418             template_dict = dict(info_dict)
 419
 420             template_dict['epoch'] = int(time.time())
 421             autonumber_size = self.params.get('autonumber_size')
 422             if autonumber_size is None:
 423                 autonumber_size = 5
 424             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 425             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 426             if template_dict.get('playlist_index') is not None:
 427                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 428             if template_dict.get('resolution') is None:
 429                 if template_dict.get('width') and template_dict.get('height'):
 430                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 431                 elif template_dict.get('height'):
 432                     template_dict['resolution'] = '%sp' % template_dict['height']
 433                 elif template_dict.get('width'):
 434                     template_dict['resolution'] = '?x%d' % template_dict['width']
 435
 436             sanitize = lambda k, v: sanitize_filename(
 437                 compat_str(v),
 438                 restricted=self.params.get('restrictfilenames'),
 439                 is_id=(k == 'id'))
 440             template_dict = dict((k, sanitize(k, v))
 441                                  for k, v in template_dict.items()
 442                                  if v is not None)
 443             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 444
 445             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 446             tmpl = os.path.expanduser(outtmpl)
 447             filename = tmpl % template_dict
 448             return filename
 449         except ValueError as err:
 450             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 451             return None
 452
 453     def _match_entry(self, info_dict):
 454         """ Returns None iff the file should be downloaded """
 455
 456         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 457         if 'title' in info_dict:
 458             # This can happen when we're just evaluating the playlist
 459             title = info_dict['title']
 460             matchtitle = self.params.get('matchtitle', False)
 461             if matchtitle:
 462                 if not re.search(matchtitle, title, re.IGNORECASE):
 463                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 464             rejecttitle = self.params.get('rejecttitle', False)
 465             if rejecttitle:
 466                 if re.search(rejecttitle, title, re.IGNORECASE):
 467                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 468         date = info_dict.get('upload_date', None)
 469         if date is not None:
 470             dateRange = self.params.get('daterange', DateRange())
 471             if date not in dateRange:
 472                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 473         view_count = info_dict.get('view_count', None)
 474         if view_count is not None:
 475             min_views = self.params.get('min_views')
 476             if min_views is not None and view_count < min_views:
 477                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 478             max_views = self.params.get('max_views')
 479             if max_views is not None and view_count > max_views:
 480                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 481         age_limit = self.params.get('age_limit')
 482         if age_limit is not None:
 483             actual_age_limit = info_dict.get('age_limit')
 484             if actual_age_limit is None:
 485                 actual_age_limit = 0
 486             if age_limit < actual_age_limit:
 487                 return 'Skipping "' + title + '" because it is age restricted'
 488         if self.in_download_archive(info_dict):
 489             return '%s has already been recorded in archive' % video_title
 490         return None
 491
 492     @staticmethod
 493     def add_extra_info(info_dict, extra_info):
 494         '''Set the keys from extra_info in info dict if they are missing'''
 495         for key, value in extra_info.items():
 496             info_dict.setdefault(key, value)
 497
 498     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 499                      process=True):
 500         '''
 501         Returns a list with a dictionary for each video we find.
 502         If 'download', also downloads the videos.
 503         extra_info is a dict containing the extra values to add to each result
 504          '''
 505
 506         if ie_key:
 507             ies = [self.get_info_extractor(ie_key)]
 508         else:
 509             ies = self._ies
 510
 511         for ie in ies:
 512             if not ie.suitable(url):
 513                 continue
 514
 515             if not ie.working():
 516                 self.report_warning('The program functionality for this site has been marked as broken, '
 517                                     'and will probably not work.')
 518
 519             try:
 520                 ie_result = ie.extract(url)
 521                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 522                     break
 523                 if isinstance(ie_result, list):
 524                     # Backwards compatibility: old IE result format
 525                     ie_result = {
 526                         '_type': 'compat_list',
 527                         'entries': ie_result,
 528                     }
 529                 self.add_default_extra_info(ie_result, ie, url)
 530                 if process:
 531                     return self.process_ie_result(ie_result, download, extra_info)
 532                 else:
 533                     return ie_result
 534             except ExtractorError as de: # An error we somewhat expected
 535                 self.report_error(compat_str(de), de.format_traceback())
 536                 break
 537             except MaxDownloadsReached:
 538                 raise
 539             except Exception as e:
 540                 if self.params.get('ignoreerrors', False):
 541                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 542                     break
 543                 else:
 544                     raise
 545         else:
 546             self.report_error('no suitable InfoExtractor for URL %s' % url)
 547
 548     def add_default_extra_info(self, ie_result, ie, url):
 549         self.add_extra_info(ie_result, {
 550             'extractor': ie.IE_NAME,
 551             'webpage_url': url,
 552             'webpage_url_basename': url_basename(url),
 553             'extractor_key': ie.ie_key(),
 554         })
 555
 556     def process_ie_result(self, ie_result, download=True, extra_info={}):
 557         """
 558         Take the result of the ie(may be modified) and resolve all unresolved
 559         references (URLs, playlist items).
 560
 561         It will also download the videos if 'download'.
 562         Returns the resolved ie_result.
 563         """
 564
 565         result_type = ie_result.get('_type', 'video')
 566
 567         if self.params.get('extract_flat', False):
 568             if result_type in ('url', 'url_transparent'):
 569                 return ie_result
 570
 571         if result_type == 'video':
 572             self.add_extra_info(ie_result, extra_info)
 573             return self.process_video_result(ie_result, download=download)
 574         elif result_type == 'url':
 575             # We have to add extra_info to the results because it may be
 576             # contained in a playlist
 577             return self.extract_info(ie_result['url'],
 578                                      download,
 579                                      ie_key=ie_result.get('ie_key'),
 580                                      extra_info=extra_info)
 581         elif result_type == 'url_transparent':
 582             # Use the information from the embedding page
 583             info = self.extract_info(
 584                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 585                 extra_info=extra_info, download=False, process=False)
 586
 587             def make_result(embedded_info):
 588                 new_result = ie_result.copy()
 589                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 590                           'entries', 'ie_key', 'duration',
 591                           'subtitles', 'annotations', 'format',
 592                           'thumbnail', 'thumbnails'):
 593                     if f in new_result:
 594                         del new_result[f]
 595                     if f in embedded_info:
 596                         new_result[f] = embedded_info[f]
 597                 return new_result
 598             new_result = make_result(info)
 599
 600             assert new_result.get('_type') != 'url_transparent'
 601             if new_result.get('_type') == 'compat_list':
 602                 new_result['entries'] = [
 603                     make_result(e) for e in new_result['entries']]
 604
 605             return self.process_ie_result(
 606                 new_result, download=download, extra_info=extra_info)
 607         elif result_type == 'playlist':
 608             # We process each entry in the playlist
 609             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 610             self.to_screen('[download] Downloading playlist: %s' % playlist)
 611
 612             playlist_results = []
 613
 614             playliststart = self.params.get('playliststart', 1) - 1
 615             playlistend = self.params.get('playlistend', None)
 616             # For backwards compatibility, interpret -1 as whole list
 617             if playlistend == -1:
 618                 playlistend = None
 619
 620             if isinstance(ie_result['entries'], list):
 621                 n_all_entries = len(ie_result['entries'])
 622                 entries = ie_result['entries'][playliststart:playlistend]
 623                 n_entries = len(entries)
 624                 self.to_screen(
 625                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 626                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 627             else:
 628                 assert isinstance(ie_result['entries'], PagedList)
 629                 entries = ie_result['entries'].getslice(
 630                     playliststart, playlistend)
 631                 n_entries = len(entries)
 632                 self.to_screen(
 633                     "[%s] playlist %s: Downloading %d videos" %
 634                     (ie_result['extractor'], playlist, n_entries))
 635
 636             for i, entry in enumerate(entries, 1):
 637                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 638                 extra = {
 639                     'playlist': playlist,
 640                     'playlist_index': i + playliststart,
 641                     'extractor': ie_result['extractor'],
 642                     'webpage_url': ie_result['webpage_url'],
 643                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 644                     'extractor_key': ie_result['extractor_key'],
 645                 }
 646
 647                 reason = self._match_entry(entry)
 648                 if reason is not None:
 649                     self.to_screen('[download] ' + reason)
 650                     continue
 651
 652                 entry_result = self.process_ie_result(entry,
 653                                                       download=download,
 654                                                       extra_info=extra)
 655                 playlist_results.append(entry_result)
 656             ie_result['entries'] = playlist_results
 657             return ie_result
 658         elif result_type == 'compat_list':
 659             def _fixup(r):
 660                 self.add_extra_info(r,
 661                     {
 662                         'extractor': ie_result['extractor'],
 663                         'webpage_url': ie_result['webpage_url'],
 664                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 665                         'extractor_key': ie_result['extractor_key'],
 666                     })
 667                 return r
 668             ie_result['entries'] = [
 669                 self.process_ie_result(_fixup(r), download, extra_info)
 670                 for r in ie_result['entries']
 671             ]
 672             return ie_result
 673         else:
 674             raise Exception('Invalid result type: %s' % result_type)
 675
 676     def select_format(self, format_spec, available_formats):
 677         if format_spec == 'best' or format_spec is None:
 678             return available_formats[-1]
 679         elif format_spec == 'worst':
 680             return available_formats[0]
 681         elif format_spec == 'bestaudio':
 682             audio_formats = [
 683                 f for f in available_formats
 684                 if f.get('vcodec') == 'none']
 685             if audio_formats:
 686                 return audio_formats[-1]
 687         elif format_spec == 'worstaudio':
 688             audio_formats = [
 689                 f for f in available_formats
 690                 if f.get('vcodec') == 'none']
 691             if audio_formats:
 692                 return audio_formats[0]
 693         elif format_spec == 'bestvideo':
 694             video_formats = [
 695                 f for f in available_formats
 696                 if f.get('acodec') == 'none']
 697             if video_formats:
 698                 return video_formats[-1]
 699         elif format_spec == 'worstvideo':
 700             video_formats = [
 701                 f for f in available_formats
 702                 if f.get('acodec') == 'none']
 703             if video_formats:
 704                 return video_formats[0]
 705         else:
 706             extensions = ['mp4', 'flv', 'webm', '3gp']
 707             if format_spec in extensions:
 708                 filter_f = lambda f: f['ext'] == format_spec
 709             else:
 710                 filter_f = lambda f: f['format_id'] == format_spec
 711             matches = list(filter(filter_f, available_formats))
 712             if matches:
 713                 return matches[-1]
 714         return None
 715
 716     def process_video_result(self, info_dict, download=True):
 717         assert info_dict.get('_type', 'video') == 'video'
 718
 719         if 'id' not in info_dict:
 720             raise ExtractorError('Missing "id" field in extractor result')
 721         if 'title' not in info_dict:
 722             raise ExtractorError('Missing "title" field in extractor result')
 723
 724         if 'playlist' not in info_dict:
 725             # It isn't part of a playlist
 726             info_dict['playlist'] = None
 727             info_dict['playlist_index'] = None
 728
 729         thumbnails = info_dict.get('thumbnails')
 730         if thumbnails:
 731             thumbnails.sort(key=lambda t: (
 732                 t.get('width'), t.get('height'), t.get('url')))
 733             for t in thumbnails:
 734                 if 'width' in t and 'height' in t:
 735                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 736
 737         if thumbnails and 'thumbnail' not in info_dict:
 738             info_dict['thumbnail'] = thumbnails[-1]['url']
 739
 740         if 'display_id' not in info_dict and 'id' in info_dict:
 741             info_dict['display_id'] = info_dict['id']
 742
 743         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 744             upload_date = datetime.datetime.utcfromtimestamp(
 745                 info_dict['timestamp'])
 746             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 747
 748         # This extractors handle format selection themselves
 749         if info_dict['extractor'] in ['Youku']:
 750             if download:
 751                 self.process_info(info_dict)
 752             return info_dict
 753
 754         # We now pick which formats have to be downloaded
 755         if info_dict.get('formats') is None:
 756             # There's only one format available
 757             formats = [info_dict]
 758         else:
 759             formats = info_dict['formats']
 760
 761         if not formats:
 762             raise ExtractorError('No video formats found!')
 763
 764         # We check that all the formats have the format and format_id fields
 765         for i, format in enumerate(formats):
 766             if 'url' not in format:
 767                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 768
 769             if format.get('format_id') is None:
 770                 format['format_id'] = compat_str(i)
 771             if format.get('format') is None:
 772                 format['format'] = '{id} - {res}{note}'.format(
 773                     id=format['format_id'],
 774                     res=self.format_resolution(format),
 775                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 776                 )
 777             # Automatically determine file extension if missing
 778             if 'ext' not in format:
 779                 format['ext'] = determine_ext(format['url']).lower()
 780
 781         format_limit = self.params.get('format_limit', None)
 782         if format_limit:
 783             formats = list(takewhile_inclusive(
 784                 lambda f: f['format_id'] != format_limit, formats
 785             ))
 786
 787         # TODO Central sorting goes here
 788
 789         if formats[0] is not info_dict:
 790             # only set the 'formats' fields if the original info_dict list them
 791             # otherwise we end up with a circular reference, the first (and unique)
 792             # element in the 'formats' field in info_dict is info_dict itself,
 793             # wich can't be exported to json
 794             info_dict['formats'] = formats
 795         if self.params.get('listformats', None):
 796             self.list_formats(info_dict)
 797             return
 798
 799         req_format = self.params.get('format')
 800         if req_format is None:
 801             req_format = 'best'
 802         formats_to_download = []
 803         # The -1 is for supporting YoutubeIE
 804         if req_format in ('-1', 'all'):
 805             formats_to_download = formats
 806         else:
 807             # We can accept formats requested in the format: 34/5/best, we pick
 808             # the first that is available, starting from left
 809             req_formats = req_format.split('/')
 810             for rf in req_formats:
 811                 if re.match(r'.+?\+.+?', rf) is not None:
 812                     # Two formats have been requested like '137+139'
 813                     format_1, format_2 = rf.split('+')
 814                     formats_info = (self.select_format(format_1, formats),
 815                         self.select_format(format_2, formats))
 816                     if all(formats_info):
 817                         selected_format = {
 818                             'requested_formats': formats_info,
 819                             'format': rf,
 820                             'ext': formats_info[0]['ext'],
 821                         }
 822                     else:
 823                         selected_format = None
 824                 else:
 825                     selected_format = self.select_format(rf, formats)
 826                 if selected_format is not None:
 827                     formats_to_download = [selected_format]
 828                     break
 829         if not formats_to_download:
 830             raise ExtractorError('requested format not available',
 831                                  expected=True)
 832
 833         if download:
 834             if len(formats_to_download) > 1:
 835                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 836             for format in formats_to_download:
 837                 new_info = dict(info_dict)
 838                 new_info.update(format)
 839                 self.process_info(new_info)
 840         # We update the info dict with the best quality format (backwards compatibility)
 841         info_dict.update(formats_to_download[-1])
 842         return info_dict
 843
 844     def process_info(self, info_dict):
 845         """Process a single resolved IE result."""
 846
 847         assert info_dict.get('_type', 'video') == 'video'
 848
 849         max_downloads = self.params.get('max_downloads')
 850         if max_downloads is not None:
 851             if self._num_downloads >= int(max_downloads):
 852                 raise MaxDownloadsReached()
 853
 854         info_dict['fulltitle'] = info_dict['title']
 855         if len(info_dict['title']) > 200:
 856             info_dict['title'] = info_dict['title'][:197] + '...'
 857
 858         # Keep for backwards compatibility
 859         info_dict['stitle'] = info_dict['title']
 860
 861         if 'format' not in info_dict:
 862             info_dict['format'] = info_dict['ext']
 863
 864         reason = self._match_entry(info_dict)
 865         if reason is not None:
 866             self.to_screen('[download] ' + reason)
 867             return
 868
 869         self._num_downloads += 1
 870
 871         filename = self.prepare_filename(info_dict)
 872
 873         # Forced printings
 874         if self.params.get('forcetitle', False):
 875             self.to_stdout(info_dict['fulltitle'])
 876         if self.params.get('forceid', False):
 877             self.to_stdout(info_dict['id'])
 878         if self.params.get('forceurl', False):
 879             # For RTMP URLs, also include the playpath
 880             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 881         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 882             self.to_stdout(info_dict['thumbnail'])
 883         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 884             self.to_stdout(info_dict['description'])
 885         if self.params.get('forcefilename', False) and filename is not None:
 886             self.to_stdout(filename)
 887         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 888             self.to_stdout(formatSeconds(info_dict['duration']))
 889         if self.params.get('forceformat', False):
 890             self.to_stdout(info_dict['format'])
 891         if self.params.get('forcejson', False):
 892             info_dict['_filename'] = filename
 893             self.to_stdout(json.dumps(info_dict))
 894
 895         # Do nothing else if in simulate mode
 896         if self.params.get('simulate', False):
 897             return
 898
 899         if filename is None:
 900             return
 901
 902         try:
 903             dn = os.path.dirname(encodeFilename(filename))
 904             if dn and not os.path.exists(dn):
 905                 os.makedirs(dn)
 906         except (OSError, IOError) as err:
 907             self.report_error('unable to create directory ' + compat_str(err))
 908             return
 909
 910         if self.params.get('writedescription', False):
 911             descfn = filename + '.description'
 912             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 913                 self.to_screen('[info] Video description is already present')
 914             else:
 915                 try:
 916                     self.to_screen('[info] Writing video description to: ' + descfn)
 917                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 918                         descfile.write(info_dict['description'])
 919                 except (KeyError, TypeError):
 920                     self.report_warning('There\'s no description to write.')
 921                 except (OSError, IOError):
 922                     self.report_error('Cannot write description file ' + descfn)
 923                     return
 924
 925         if self.params.get('writeannotations', False):
 926             annofn = filename + '.annotations.xml'
 927             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 928                 self.to_screen('[info] Video annotations are already present')
 929             else:
 930                 try:
 931                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 932                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 933                         annofile.write(info_dict['annotations'])
 934                 except (KeyError, TypeError):
 935                     self.report_warning('There are no annotations to write.')
 936                 except (OSError, IOError):
 937                     self.report_error('Cannot write annotations file: ' + annofn)
 938                     return
 939
 940         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 941                                        self.params.get('writeautomaticsub')])
 942
 943         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 944             # subtitles download errors are already managed as troubles in relevant IE
 945             # that way it will silently go on when used with unsupporting IE
 946             subtitles = info_dict['subtitles']
 947             sub_format = self.params.get('subtitlesformat', 'srt')
 948             for sub_lang in subtitles.keys():
 949                 sub = subtitles[sub_lang]
 950                 if sub is None:
 951                     continue
 952                 try:
 953                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 954                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 955                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 956                     else:
 957                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 958                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 959                                 subfile.write(sub)
 960                 except (OSError, IOError):
 961                     self.report_error('Cannot write subtitles file ' + sub_filename)
 962                     return
 963
 964         if self.params.get('writeinfojson', False):
 965             infofn = os.path.splitext(filename)[0] + '.info.json'
 966             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 967                 self.to_screen('[info] Video description metadata is already present')
 968             else:
 969                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 970                 try:
 971                     write_json_file(info_dict, encodeFilename(infofn))
 972                 except (OSError, IOError):
 973                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 974                     return
 975
 976         if self.params.get('writethumbnail', False):
 977             if info_dict.get('thumbnail') is not None:
 978                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 979                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 980                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 981                     self.to_screen('[%s] %s: Thumbnail is already present' %
 982                                    (info_dict['extractor'], info_dict['id']))
 983                 else:
 984                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 985                                    (info_dict['extractor'], info_dict['id']))
 986                     try:
 987                         uf = self.urlopen(info_dict['thumbnail'])
 988                         with open(thumb_filename, 'wb') as thumbf:
 989                             shutil.copyfileobj(uf, thumbf)
 990                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 991                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 992                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 993                         self.report_warning('Unable to download thumbnail "%s": %s' %
 994                             (info_dict['thumbnail'], compat_str(err)))
 995
 996         if not self.params.get('skip_download', False):
 997             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 998                 success = True
 999             else:
1000                 try:
1001                     def dl(name, info):
1002                         fd = get_suitable_downloader(info)(self, self.params)
1003                         for ph in self._progress_hooks:
1004                             fd.add_progress_hook(ph)
1005                         if self.params.get('verbose'):
1006                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1007                         return fd.download(name, info)
1008                     if info_dict.get('requested_formats') is not None:
1009                         downloaded = []
1010                         success = True
1011                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1012                         if not merger._get_executable():
1013                             postprocessors = []
1014                             self.report_warning('You have requested multiple '
1015                                 'formats but ffmpeg or avconv are not installed.'
1016                                 ' The formats won\'t be merged')
1017                         else:
1018                             postprocessors = [merger]
1019                         for f in info_dict['requested_formats']:
1020                             new_info = dict(info_dict)
1021                             new_info.update(f)
1022                             fname = self.prepare_filename(new_info)
1023                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1024                             downloaded.append(fname)
1025                             partial_success = dl(fname, new_info)
1026                             success = success and partial_success
1027                         info_dict['__postprocessors'] = postprocessors
1028                         info_dict['__files_to_merge'] = downloaded
1029                     else:
1030                         # Just a single file
1031                         success = dl(filename, info_dict)
1032                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1033                     self.report_error('unable to download video data: %s' % str(err))
1034                     return
1035                 except (OSError, IOError) as err:
1036                     raise UnavailableVideoError(err)
1037                 except (ContentTooShortError, ) as err:
1038                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1039                     return
1040
1041             if success:
1042                 try:
1043                     self.post_process(filename, info_dict)
1044                 except (PostProcessingError) as err:
1045                     self.report_error('postprocessing: %s' % str(err))
1046                     return
1047
1048         self.record_download_archive(info_dict)
1049
1050     def download(self, url_list):
1051         """Download a given list of URLs."""
1052         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1053         if (len(url_list) > 1 and
1054                 '%' not in outtmpl
1055                 and self.params.get('max_downloads') != 1):
1056             raise SameFileError(outtmpl)
1057
1058         for url in url_list:
1059             try:
1060                 #It also downloads the videos
1061                 self.extract_info(url)
1062             except UnavailableVideoError:
1063                 self.report_error('unable to download video')
1064             except MaxDownloadsReached:
1065                 self.to_screen('[info] Maximum number of downloaded files reached.')
1066                 raise
1067
1068         return self._download_retcode
1069
1070     def download_with_info_file(self, info_filename):
1071         with io.open(info_filename, 'r', encoding='utf-8') as f:
1072             info = json.load(f)
1073         try:
1074             self.process_ie_result(info, download=True)
1075         except DownloadError:
1076             webpage_url = info.get('webpage_url')
1077             if webpage_url is not None:
1078                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1079                 return self.download([webpage_url])
1080             else:
1081                 raise
1082         return self._download_retcode
1083
1084     def post_process(self, filename, ie_info):
1085         """Run all the postprocessors on the given file."""
1086         info = dict(ie_info)
1087         info['filepath'] = filename
1088         keep_video = None
1089         pps_chain = []
1090         if ie_info.get('__postprocessors') is not None:
1091             pps_chain.extend(ie_info['__postprocessors'])
1092         pps_chain.extend(self._pps)
1093         for pp in pps_chain:
1094             try:
1095                 keep_video_wish, new_info = pp.run(info)
1096                 if keep_video_wish is not None:
1097                     if keep_video_wish:
1098                         keep_video = keep_video_wish
1099                     elif keep_video is None:
1100                         # No clear decision yet, let IE decide
1101                         keep_video = keep_video_wish
1102             except PostProcessingError as e:
1103                 self.report_error(e.msg)
1104         if keep_video is False and not self.params.get('keepvideo', False):
1105             try:
1106                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1107                 os.remove(encodeFilename(filename))
1108             except (IOError, OSError):
1109                 self.report_warning('Unable to remove downloaded video file')
1110
1111     def _make_archive_id(self, info_dict):
1112         # Future-proof against any change in case
1113         # and backwards compatibility with prior versions
1114         extractor = info_dict.get('extractor_key')
1115         if extractor is None:
1116             if 'id' in info_dict:
1117                 extractor = info_dict.get('ie_key')  # key in a playlist
1118         if extractor is None:
1119             return None  # Incomplete video information
1120         return extractor.lower() + ' ' + info_dict['id']
1121
1122     def in_download_archive(self, info_dict):
1123         fn = self.params.get('download_archive')
1124         if fn is None:
1125             return False
1126
1127         vid_id = self._make_archive_id(info_dict)
1128         if vid_id is None:
1129             return False  # Incomplete video information
1130
1131         try:
1132             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1133                 for line in archive_file:
1134                     if line.strip() == vid_id:
1135                         return True
1136         except IOError as ioe:
1137             if ioe.errno != errno.ENOENT:
1138                 raise
1139         return False
1140
1141     def record_download_archive(self, info_dict):
1142         fn = self.params.get('download_archive')
1143         if fn is None:
1144             return
1145         vid_id = self._make_archive_id(info_dict)
1146         assert vid_id
1147         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1148             archive_file.write(vid_id + '\n')
1149
1150     @staticmethod
1151     def format_resolution(format, default='unknown'):
1152         if format.get('vcodec') == 'none':
1153             return 'audio only'
1154         if format.get('resolution') is not None:
1155             return format['resolution']
1156         if format.get('height') is not None:
1157             if format.get('width') is not None:
1158                 res = '%sx%s' % (format['width'], format['height'])
1159             else:
1160                 res = '%sp' % format['height']
1161         elif format.get('width') is not None:
1162             res = '?x%d' % format['width']
1163         else:
1164             res = default
1165         return res
1166
1167     def _format_note(self, fdict):
1168         res = ''
1169         if fdict.get('ext') in ['f4f', 'f4m']:
1170             res += '(unsupported) '
1171         if fdict.get('format_note') is not None:
1172             res += fdict['format_note'] + ' '
1173         if fdict.get('tbr') is not None:
1174             res += '%4dk ' % fdict['tbr']
1175         if fdict.get('container') is not None:
1176             if res:
1177                 res += ', '
1178             res += '%s container' % fdict['container']
1179         if (fdict.get('vcodec') is not None and
1180                 fdict.get('vcodec') != 'none'):
1181             if res:
1182                 res += ', '
1183             res += fdict['vcodec']
1184             if fdict.get('vbr') is not None:
1185                 res += '@'
1186         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1187             res += 'video@'
1188         if fdict.get('vbr') is not None:
1189             res += '%4dk' % fdict['vbr']
1190         if fdict.get('acodec') is not None:
1191             if res:
1192                 res += ', '
1193             if fdict['acodec'] == 'none':
1194                 res += 'video only'
1195             else:
1196                 res += '%-5s' % fdict['acodec']
1197         elif fdict.get('abr') is not None:
1198             if res:
1199                 res += ', '
1200             res += 'audio'
1201         if fdict.get('abr') is not None:
1202             res += '@%3dk' % fdict['abr']
1203         if fdict.get('asr') is not None:
1204             res += ' (%5dHz)' % fdict['asr']
1205         if fdict.get('filesize') is not None:
1206             if res:
1207                 res += ', '
1208             res += format_bytes(fdict['filesize'])
1209         elif fdict.get('filesize_approx') is not None:
1210             if res:
1211                 res += ', '
1212             res += '~' + format_bytes(fdict['filesize_approx'])
1213         return res
1214
1215     def list_formats(self, info_dict):
1216         def line(format, idlen=20):
1217             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1218                 format['format_id'],
1219                 format['ext'],
1220                 self.format_resolution(format),
1221                 self._format_note(format),
1222             ))
1223
1224         formats = info_dict.get('formats', [info_dict])
1225         idlen = max(len('format code'),
1226                     max(len(f['format_id']) for f in formats))
1227         formats_s = [line(f, idlen) for f in formats]
1228         if len(formats) > 1:
1229             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1230             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1231
1232         header_line = line({
1233             'format_id': 'format code', 'ext': 'extension',
1234             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1235         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1236                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1237
1238     def urlopen(self, req):
1239         """ Start an HTTP download """
1240         return self._opener.open(req, timeout=self._socket_timeout)
1241
1242     def print_debug_header(self):
1243         if not self.params.get('verbose'):
1244             return
1245
1246         if type('') is not compat_str:
1247             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1248             self.report_warning(
1249                 'Your Python is broken! Update to a newer and supported version')
1250
1251         encoding_str = (
1252             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1253                 locale.getpreferredencoding(),
1254                 sys.getfilesystemencoding(),
1255                 sys.stdout.encoding,
1256                 self.get_encoding()))
1257         write_string(encoding_str, encoding=None)
1258
1259         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1260         try:
1261             sp = subprocess.Popen(
1262                 ['git', 'rev-parse', '--short', 'HEAD'],
1263                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1264                 cwd=os.path.dirname(os.path.abspath(__file__)))
1265             out, err = sp.communicate()
1266             out = out.decode().strip()
1267             if re.match('[0-9a-f]+', out):
1268                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1269         except:
1270             try:
1271                 sys.exc_clear()
1272             except:
1273                 pass
1274         self._write_string('[debug] Python version %s - %s' %
1275                      (platform.python_version(), platform_name()) + '\n')
1276
1277         proxy_map = {}
1278         for handler in self._opener.handlers:
1279             if hasattr(handler, 'proxies'):
1280                 proxy_map.update(handler.proxies)
1281         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1282
1283     def _setup_opener(self):
1284         timeout_val = self.params.get('socket_timeout')
1285         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1286
1287         opts_cookiefile = self.params.get('cookiefile')
1288         opts_proxy = self.params.get('proxy')
1289
1290         if opts_cookiefile is None:
1291             self.cookiejar = compat_cookiejar.CookieJar()
1292         else:
1293             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1294                 opts_cookiefile)
1295             if os.access(opts_cookiefile, os.R_OK):
1296                 self.cookiejar.load()
1297
1298         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1299             self.cookiejar)
1300         if opts_proxy is not None:
1301             if opts_proxy == '':
1302                 proxies = {}
1303             else:
1304                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1305         else:
1306             proxies = compat_urllib_request.getproxies()
1307             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1308             if 'http' in proxies and 'https' not in proxies:
1309                 proxies['https'] = proxies['http']
1310         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1311
1312         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1313         https_handler = make_HTTPS_handler(
1314             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1315         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1316         opener = compat_urllib_request.build_opener(
1317             https_handler, proxy_handler, cookie_processor, ydlh)
1318         # Delete the default user-agent header, which would otherwise apply in
1319         # cases where our custom HTTP handler doesn't come into play
1320         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1321         opener.addheaders = []
1322         self._opener = opener
1323
1324     def encode(self, s):
1325         if isinstance(s, bytes):
1326             return s  # Already encoded
1327
1328         try:
1329             return s.encode(self.get_encoding())
1330         except UnicodeEncodeError as err:
1331             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1332             raise
1333
1334     def get_encoding(self):
1335         encoding = self.params.get('encoding')
1336         if encoding is None:
1337             encoding = preferredencoding()
1338         return encoding