youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     DEFAULT_OUTTMPL,
  35     determine_ext,
  36     DownloadError,
  37     encodeFilename,
  38     ExtractorError,
  39     format_bytes,
  40     formatSeconds,
  41     get_term_width,
  42     locked_file,
  43     make_HTTPS_handler,
  44     MaxDownloadsReached,
  45     PagedList,
  46     PostProcessingError,
  47     platform_name,
  48     preferredencoding,
  49     SameFileError,
  50     sanitize_filename,
  51     subtitles_filename,
  52     takewhile_inclusive,
  53     UnavailableVideoError,
  54     url_basename,
  55     write_json_file,
  56     write_string,
  57     YoutubeDLHandler,
  58     prepend_extension,
  59 )
  60 from .extractor import get_info_extractor, gen_extractors
  61 from .downloader import get_suitable_downloader
  62 from .postprocessor import FFmpegMergerPP
  63 from .version import __version__
  64
  65
  66 class YoutubeDL(object):
  67     """YoutubeDL class.
  68
  69     YoutubeDL objects are the ones responsible of downloading the
  70     actual video file and writing it to disk if the user has requested
  71     it, among some other tasks. In most cases there should be one per
  72     program. As, given a video URL, the downloader doesn't know how to
  73     extract all the needed information, task that InfoExtractors do, it
  74     has to pass the URL to one of them.
  75
  76     For this, YoutubeDL objects have a method that allows
  77     InfoExtractors to be registered in a given order. When it is passed
  78     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  79     finds that reports being able to handle it. The InfoExtractor extracts
  80     all the information about the video or videos the URL refers to, and
  81     YoutubeDL process the extracted information, possibly using a File
  82     Downloader to download the video.
  83
  84     YoutubeDL objects accept a lot of parameters. In order not to saturate
  85     the object constructor with arguments, it receives a dictionary of
  86     options instead. These options are available through the params
  87     attribute for the InfoExtractors to use. The YoutubeDL also
  88     registers itself as the downloader in charge for the InfoExtractors
  89     that are added to it, so this is a "mutual registration".
  90
  91     Available options:
  92
  93     username:          Username for authentication purposes.
  94     password:          Password for authentication purposes.
  95     videopassword:     Password for acces a video.
  96     usenetrc:          Use netrc for authentication instead.
  97     verbose:           Print additional info to stdout.
  98     quiet:             Do not print messages to stdout.
  99     no_warnings:       Do not print out anything for warnings.
 100     forceurl:          Force printing final URL.
 101     forcetitle:        Force printing title.
 102     forceid:           Force printing ID.
 103     forcethumbnail:    Force printing thumbnail URL.
 104     forcedescription:  Force printing description.
 105     forcefilename:     Force printing final filename.
 106     forceduration:     Force printing duration.
 107     forcejson:         Force printing info_dict as JSON.
 108     simulate:          Do not download the video files.
 109     format:            Video format code.
 110     format_limit:      Highest quality format to try.
 111     outtmpl:           Template for output names.
 112     restrictfilenames: Do not allow "&" and spaces in file names
 113     ignoreerrors:      Do not stop on download errors.
 114     nooverwrites:      Prevent overwriting files.
 115     playliststart:     Playlist item to start at.
 116     playlistend:       Playlist item to end at.
 117     matchtitle:        Download only matching titles.
 118     rejecttitle:       Reject downloads for matching titles.
 119     logger:            Log messages to a logging.Logger instance.
 120     logtostderr:       Log messages to stderr instead of stdout.
 121     writedescription:  Write the video description to a .description file
 122     writeinfojson:     Write the video description to a .info.json file
 123     writeannotations:  Write the video annotations to a .annotations.xml file
 124     writethumbnail:    Write the thumbnail image to a file
 125     writesubtitles:    Write the video subtitles to a file
 126     writeautomaticsub: Write the automatic subtitles to a file
 127     allsubtitles:      Downloads all the subtitles of the video
 128                        (requires writesubtitles or writeautomaticsub)
 129     listsubtitles:     Lists all available subtitles for the video
 130     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 131     subtitleslangs:    List of languages of the subtitles to download
 132     keepvideo:         Keep the video file after post-processing
 133     daterange:         A DateRange object, download only if the upload_date is in the range.
 134     skip_download:     Skip the actual download of the video file
 135     cachedir:          Location of the cache files in the filesystem.
 136                        None to disable filesystem cache.
 137     noplaylist:        Download single video instead of a playlist if in doubt.
 138     age_limit:         An integer representing the user's age in years.
 139                        Unsuitable videos for the given age are skipped.
 140     min_views:         An integer representing the minimum view count the video
 141                        must have in order to not be skipped.
 142                        Videos without view count information are always
 143                        downloaded. None for no limit.
 144     max_views:         An integer representing the maximum view count.
 145                        Videos that are more popular than that are not
 146                        downloaded.
 147                        Videos without view count information are always
 148                        downloaded. None for no limit.
 149     download_archive:  File name of a file where all downloads are recorded.
 150                        Videos already present in the file are not downloaded
 151                        again.
 152     cookiefile:        File name where cookies should be read from and dumped to.
 153     nocheckcertificate:Do not verify SSL certificates
 154     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 155                        At the moment, this is only supported by YouTube.
 156     proxy:             URL of the proxy server to use
 157     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 158     bidi_workaround:   Work around buggy terminals without bidirectional text
 159                        support, using fridibi
 160     debug_printtraffic:Print out sent and received HTTP traffic
 161     include_ads:       Download ads as well
 162     default_search:    Prepend this string if an input url is not valid.
 163                        'auto' for elaborate guessing
 164     encoding:          Use this encoding instead of the system-specified.
 165     extract_flat:      Do not resolve URLs, return the immediate result.
 166
 167     The following parameters are not used by YoutubeDL itself, they are used by
 168     the FileDownloader:
 169     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 170     noresizebuffer, retries, continuedl, noprogress, consoletitle
 171
 172     The following options are used by the post processors:
 173     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 174                        otherwise prefer avconv.
 175     """
 176
 177     params = None
 178     _ies = []
 179     _pps = []
 180     _download_retcode = None
 181     _num_downloads = None
 182     _screen_file = None
 183
 184     def __init__(self, params=None):
 185         """Create a FileDownloader object with the given options."""
 186         if params is None:
 187             params = {}
 188         self._ies = []
 189         self._ies_instances = {}
 190         self._pps = []
 191         self._progress_hooks = []
 192         self._download_retcode = 0
 193         self._num_downloads = 0
 194         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 195         self._err_file = sys.stderr
 196         self.params = params
 197
 198         if params.get('bidi_workaround', False):
 199             try:
 200                 import pty
 201                 master, slave = pty.openpty()
 202                 width = get_term_width()
 203                 if width is None:
 204                     width_args = []
 205                 else:
 206                     width_args = ['-w', str(width)]
 207                 sp_kwargs = dict(
 208                     stdin=subprocess.PIPE,
 209                     stdout=slave,
 210                     stderr=self._err_file)
 211                 try:
 212                     self._output_process = subprocess.Popen(
 213                         ['bidiv'] + width_args, **sp_kwargs
 214                     )
 215                 except OSError:
 216                     self._output_process = subprocess.Popen(
 217                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 218                 self._output_channel = os.fdopen(master, 'rb')
 219             except OSError as ose:
 220                 if ose.errno == 2:
 221                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 222                 else:
 223                     raise
 224
 225         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 226                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 227                 and not params['restrictfilenames']):
 228             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 229             self.report_warning(
 230                 'Assuming --restrict-filenames since file system encoding '
 231                 'cannot encode all charactes. '
 232                 'Set the LC_ALL environment variable to fix this.')
 233             self.params['restrictfilenames'] = True
 234
 235         if '%(stitle)s' in self.params.get('outtmpl', ''):
 236             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 237
 238         self._setup_opener()
 239
 240     def add_info_extractor(self, ie):
 241         """Add an InfoExtractor object to the end of the list."""
 242         self._ies.append(ie)
 243         self._ies_instances[ie.ie_key()] = ie
 244         ie.set_downloader(self)
 245
 246     def get_info_extractor(self, ie_key):
 247         """
 248         Get an instance of an IE with name ie_key, it will try to get one from
 249         the _ies list, if there's no instance it will create a new one and add
 250         it to the extractor list.
 251         """
 252         ie = self._ies_instances.get(ie_key)
 253         if ie is None:
 254             ie = get_info_extractor(ie_key)()
 255             self.add_info_extractor(ie)
 256         return ie
 257
 258     def add_default_info_extractors(self):
 259         """
 260         Add the InfoExtractors returned by gen_extractors to the end of the list
 261         """
 262         for ie in gen_extractors():
 263             self.add_info_extractor(ie)
 264
 265     def add_post_processor(self, pp):
 266         """Add a PostProcessor object to the end of the chain."""
 267         self._pps.append(pp)
 268         pp.set_downloader(self)
 269
 270     def add_progress_hook(self, ph):
 271         """Add the progress hook (currently only for the file downloader)"""
 272         self._progress_hooks.append(ph)
 273
 274     def _bidi_workaround(self, message):
 275         if not hasattr(self, '_output_channel'):
 276             return message
 277
 278         assert hasattr(self, '_output_process')
 279         assert isinstance(message, compat_str)
 280         line_count = message.count('\n') + 1
 281         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 282         self._output_process.stdin.flush()
 283         res = ''.join(self._output_channel.readline().decode('utf-8')
 284                        for _ in range(line_count))
 285         return res[:-len('\n')]
 286
 287     def to_screen(self, message, skip_eol=False):
 288         """Print message to stdout if not in quiet mode."""
 289         return self.to_stdout(message, skip_eol, check_quiet=True)
 290
 291     def _write_string(self, s, out=None):
 292         write_string(s, out=out, encoding=self.params.get('encoding'))
 293
 294     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 295         """Print message to stdout if not in quiet mode."""
 296         if self.params.get('logger'):
 297             self.params['logger'].debug(message)
 298         elif not check_quiet or not self.params.get('quiet', False):
 299             message = self._bidi_workaround(message)
 300             terminator = ['\n', ''][skip_eol]
 301             output = message + terminator
 302
 303             self._write_string(output, self._screen_file)
 304
 305     def to_stderr(self, message):
 306         """Print message to stderr."""
 307         assert isinstance(message, compat_str)
 308         if self.params.get('logger'):
 309             self.params['logger'].error(message)
 310         else:
 311             message = self._bidi_workaround(message)
 312             output = message + '\n'
 313             self._write_string(output, self._err_file)
 314
 315     def to_console_title(self, message):
 316         if not self.params.get('consoletitle', False):
 317             return
 318         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 319             # c_wchar_p() might not be necessary if `message` is
 320             # already of type unicode()
 321             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 322         elif 'TERM' in os.environ:
 323             self._write_string('\033]0;%s\007' % message, self._screen_file)
 324
 325     def save_console_title(self):
 326         if not self.params.get('consoletitle', False):
 327             return
 328         if 'TERM' in os.environ:
 329             # Save the title on stack
 330             self._write_string('\033[22;0t', self._screen_file)
 331
 332     def restore_console_title(self):
 333         if not self.params.get('consoletitle', False):
 334             return
 335         if 'TERM' in os.environ:
 336             # Restore the title from stack
 337             self._write_string('\033[23;0t', self._screen_file)
 338
 339     def __enter__(self):
 340         self.save_console_title()
 341         return self
 342
 343     def __exit__(self, *args):
 344         self.restore_console_title()
 345
 346         if self.params.get('cookiefile') is not None:
 347             self.cookiejar.save()
 348
 349     def trouble(self, message=None, tb=None):
 350         """Determine action to take when a download problem appears.
 351
 352         Depending on if the downloader has been configured to ignore
 353         download errors or not, this method may throw an exception or
 354         not when errors are found, after printing the message.
 355
 356         tb, if given, is additional traceback information.
 357         """
 358         if message is not None:
 359             self.to_stderr(message)
 360         if self.params.get('verbose'):
 361             if tb is None:
 362                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 363                     tb = ''
 364                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 365                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 366                     tb += compat_str(traceback.format_exc())
 367                 else:
 368                     tb_data = traceback.format_list(traceback.extract_stack())
 369                     tb = ''.join(tb_data)
 370             self.to_stderr(tb)
 371         if not self.params.get('ignoreerrors', False):
 372             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 373                 exc_info = sys.exc_info()[1].exc_info
 374             else:
 375                 exc_info = sys.exc_info()
 376             raise DownloadError(message, exc_info)
 377         self._download_retcode = 1
 378
 379     def report_warning(self, message):
 380         '''
 381         Print the message to stderr, it will be prefixed with 'WARNING:'
 382         If stderr is a tty file the 'WARNING:' will be colored
 383         '''
 384         if self.params.get('logger') is not None:
 385             self.params['logger'].warning(message)
 386         else:
 387             if self.params.get('no_warnings'):
 388                 return
 389             if self._err_file.isatty() and os.name != 'nt':
 390                 _msg_header = '\033[0;33mWARNING:\033[0m'
 391             else:
 392                 _msg_header = 'WARNING:'
 393             warning_message = '%s %s' % (_msg_header, message)
 394             self.to_stderr(warning_message)
 395
 396     def report_error(self, message, tb=None):
 397         '''
 398         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 399         in red if stderr is a tty file.
 400         '''
 401         if self._err_file.isatty() and os.name != 'nt':
 402             _msg_header = '\033[0;31mERROR:\033[0m'
 403         else:
 404             _msg_header = 'ERROR:'
 405         error_message = '%s %s' % (_msg_header, message)
 406         self.trouble(error_message, tb)
 407
 408     def report_file_already_downloaded(self, file_name):
 409         """Report file has already been fully downloaded."""
 410         try:
 411             self.to_screen('[download] %s has already been downloaded' % file_name)
 412         except UnicodeEncodeError:
 413             self.to_screen('[download] The file has already been downloaded')
 414
 415     def prepare_filename(self, info_dict):
 416         """Generate the output filename."""
 417         try:
 418             template_dict = dict(info_dict)
 419
 420             template_dict['epoch'] = int(time.time())
 421             autonumber_size = self.params.get('autonumber_size')
 422             if autonumber_size is None:
 423                 autonumber_size = 5
 424             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 425             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 426             if template_dict.get('playlist_index') is not None:
 427                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 428             if template_dict.get('resolution') is None:
 429                 if template_dict.get('width') and template_dict.get('height'):
 430                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 431                 elif template_dict.get('height'):
 432                     template_dict['resolution'] = '%sp' % template_dict['height']
 433                 elif template_dict.get('width'):
 434                     template_dict['resolution'] = '?x%d' % template_dict['width']
 435
 436             sanitize = lambda k, v: sanitize_filename(
 437                 compat_str(v),
 438                 restricted=self.params.get('restrictfilenames'),
 439                 is_id=(k == 'id'))
 440             template_dict = dict((k, sanitize(k, v))
 441                                  for k, v in template_dict.items()
 442                                  if v is not None)
 443             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 444
 445             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 446             tmpl = os.path.expanduser(outtmpl)
 447             filename = tmpl % template_dict
 448             return filename
 449         except ValueError as err:
 450             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 451             return None
 452
 453     def _match_entry(self, info_dict):
 454         """ Returns None iff the file should be downloaded """
 455
 456         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 457         if 'title' in info_dict:
 458             # This can happen when we're just evaluating the playlist
 459             title = info_dict['title']
 460             matchtitle = self.params.get('matchtitle', False)
 461             if matchtitle:
 462                 if not re.search(matchtitle, title, re.IGNORECASE):
 463                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 464             rejecttitle = self.params.get('rejecttitle', False)
 465             if rejecttitle:
 466                 if re.search(rejecttitle, title, re.IGNORECASE):
 467                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 468         date = info_dict.get('upload_date', None)
 469         if date is not None:
 470             dateRange = self.params.get('daterange', DateRange())
 471             if date not in dateRange:
 472                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 473         view_count = info_dict.get('view_count', None)
 474         if view_count is not None:
 475             min_views = self.params.get('min_views')
 476             if min_views is not None and view_count < min_views:
 477                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 478             max_views = self.params.get('max_views')
 479             if max_views is not None and view_count > max_views:
 480                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 481         age_limit = self.params.get('age_limit')
 482         if age_limit is not None:
 483             if age_limit < info_dict.get('age_limit', 0):
 484                 return 'Skipping "' + title + '" because it is age restricted'
 485         if self.in_download_archive(info_dict):
 486             return '%s has already been recorded in archive' % video_title
 487         return None
 488
 489     @staticmethod
 490     def add_extra_info(info_dict, extra_info):
 491         '''Set the keys from extra_info in info dict if they are missing'''
 492         for key, value in extra_info.items():
 493             info_dict.setdefault(key, value)
 494
 495     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 496                      process=True):
 497         '''
 498         Returns a list with a dictionary for each video we find.
 499         If 'download', also downloads the videos.
 500         extra_info is a dict containing the extra values to add to each result
 501          '''
 502
 503         if ie_key:
 504             ies = [self.get_info_extractor(ie_key)]
 505         else:
 506             ies = self._ies
 507
 508         for ie in ies:
 509             if not ie.suitable(url):
 510                 continue
 511
 512             if not ie.working():
 513                 self.report_warning('The program functionality for this site has been marked as broken, '
 514                                     'and will probably not work.')
 515
 516             try:
 517                 ie_result = ie.extract(url)
 518                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 519                     break
 520                 if isinstance(ie_result, list):
 521                     # Backwards compatibility: old IE result format
 522                     ie_result = {
 523                         '_type': 'compat_list',
 524                         'entries': ie_result,
 525                     }
 526                 self.add_default_extra_info(ie_result, ie, url)
 527                 if process:
 528                     return self.process_ie_result(ie_result, download, extra_info)
 529                 else:
 530                     return ie_result
 531             except ExtractorError as de: # An error we somewhat expected
 532                 self.report_error(compat_str(de), de.format_traceback())
 533                 break
 534             except MaxDownloadsReached:
 535                 raise
 536             except Exception as e:
 537                 if self.params.get('ignoreerrors', False):
 538                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 539                     break
 540                 else:
 541                     raise
 542         else:
 543             self.report_error('no suitable InfoExtractor for URL %s' % url)
 544
 545     def add_default_extra_info(self, ie_result, ie, url):
 546         self.add_extra_info(ie_result, {
 547             'extractor': ie.IE_NAME,
 548             'webpage_url': url,
 549             'webpage_url_basename': url_basename(url),
 550             'extractor_key': ie.ie_key(),
 551         })
 552
 553     def process_ie_result(self, ie_result, download=True, extra_info={}):
 554         """
 555         Take the result of the ie(may be modified) and resolve all unresolved
 556         references (URLs, playlist items).
 557
 558         It will also download the videos if 'download'.
 559         Returns the resolved ie_result.
 560         """
 561
 562         result_type = ie_result.get('_type', 'video')
 563
 564         if self.params.get('extract_flat', False):
 565             if result_type in ('url', 'url_transparent'):
 566                 return ie_result
 567
 568         if result_type == 'video':
 569             self.add_extra_info(ie_result, extra_info)
 570             return self.process_video_result(ie_result, download=download)
 571         elif result_type == 'url':
 572             # We have to add extra_info to the results because it may be
 573             # contained in a playlist
 574             return self.extract_info(ie_result['url'],
 575                                      download,
 576                                      ie_key=ie_result.get('ie_key'),
 577                                      extra_info=extra_info)
 578         elif result_type == 'url_transparent':
 579             # Use the information from the embedding page
 580             info = self.extract_info(
 581                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 582                 extra_info=extra_info, download=False, process=False)
 583
 584             def make_result(embedded_info):
 585                 new_result = ie_result.copy()
 586                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 587                           'entries', 'ie_key', 'duration',
 588                           'subtitles', 'annotations', 'format',
 589                           'thumbnail', 'thumbnails'):
 590                     if f in new_result:
 591                         del new_result[f]
 592                     if f in embedded_info:
 593                         new_result[f] = embedded_info[f]
 594                 return new_result
 595             new_result = make_result(info)
 596
 597             assert new_result.get('_type') != 'url_transparent'
 598             if new_result.get('_type') == 'compat_list':
 599                 new_result['entries'] = [
 600                     make_result(e) for e in new_result['entries']]
 601
 602             return self.process_ie_result(
 603                 new_result, download=download, extra_info=extra_info)
 604         elif result_type == 'playlist':
 605             # We process each entry in the playlist
 606             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 607             self.to_screen('[download] Downloading playlist: %s' % playlist)
 608
 609             playlist_results = []
 610
 611             playliststart = self.params.get('playliststart', 1) - 1
 612             playlistend = self.params.get('playlistend', None)
 613             # For backwards compatibility, interpret -1 as whole list
 614             if playlistend == -1:
 615                 playlistend = None
 616
 617             if isinstance(ie_result['entries'], list):
 618                 n_all_entries = len(ie_result['entries'])
 619                 entries = ie_result['entries'][playliststart:playlistend]
 620                 n_entries = len(entries)
 621                 self.to_screen(
 622                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 623                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 624             else:
 625                 assert isinstance(ie_result['entries'], PagedList)
 626                 entries = ie_result['entries'].getslice(
 627                     playliststart, playlistend)
 628                 n_entries = len(entries)
 629                 self.to_screen(
 630                     "[%s] playlist %s: Downloading %d videos" %
 631                     (ie_result['extractor'], playlist, n_entries))
 632
 633             for i, entry in enumerate(entries, 1):
 634                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 635                 extra = {
 636                     'playlist': playlist,
 637                     'playlist_index': i + playliststart,
 638                     'extractor': ie_result['extractor'],
 639                     'webpage_url': ie_result['webpage_url'],
 640                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 641                     'extractor_key': ie_result['extractor_key'],
 642                 }
 643
 644                 reason = self._match_entry(entry)
 645                 if reason is not None:
 646                     self.to_screen('[download] ' + reason)
 647                     continue
 648
 649                 entry_result = self.process_ie_result(entry,
 650                                                       download=download,
 651                                                       extra_info=extra)
 652                 playlist_results.append(entry_result)
 653             ie_result['entries'] = playlist_results
 654             return ie_result
 655         elif result_type == 'compat_list':
 656             def _fixup(r):
 657                 self.add_extra_info(r,
 658                     {
 659                         'extractor': ie_result['extractor'],
 660                         'webpage_url': ie_result['webpage_url'],
 661                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 662                         'extractor_key': ie_result['extractor_key'],
 663                     })
 664                 return r
 665             ie_result['entries'] = [
 666                 self.process_ie_result(_fixup(r), download, extra_info)
 667                 for r in ie_result['entries']
 668             ]
 669             return ie_result
 670         else:
 671             raise Exception('Invalid result type: %s' % result_type)
 672
 673     def select_format(self, format_spec, available_formats):
 674         if format_spec == 'best' or format_spec is None:
 675             return available_formats[-1]
 676         elif format_spec == 'worst':
 677             return available_formats[0]
 678         elif format_spec == 'bestaudio':
 679             audio_formats = [
 680                 f for f in available_formats
 681                 if f.get('vcodec') == 'none']
 682             if audio_formats:
 683                 return audio_formats[-1]
 684         elif format_spec == 'worstaudio':
 685             audio_formats = [
 686                 f for f in available_formats
 687                 if f.get('vcodec') == 'none']
 688             if audio_formats:
 689                 return audio_formats[0]
 690         elif format_spec == 'bestvideo':
 691             video_formats = [
 692                 f for f in available_formats
 693                 if f.get('acodec') == 'none']
 694             if video_formats:
 695                 return video_formats[-1]
 696         elif format_spec == 'worstvideo':
 697             video_formats = [
 698                 f for f in available_formats
 699                 if f.get('acodec') == 'none']
 700             if video_formats:
 701                 return video_formats[0]
 702         else:
 703             extensions = ['mp4', 'flv', 'webm', '3gp']
 704             if format_spec in extensions:
 705                 filter_f = lambda f: f['ext'] == format_spec
 706             else:
 707                 filter_f = lambda f: f['format_id'] == format_spec
 708             matches = list(filter(filter_f, available_formats))
 709             if matches:
 710                 return matches[-1]
 711         return None
 712
 713     def process_video_result(self, info_dict, download=True):
 714         assert info_dict.get('_type', 'video') == 'video'
 715
 716         if 'id' not in info_dict:
 717             raise ExtractorError('Missing "id" field in extractor result')
 718         if 'title' not in info_dict:
 719             raise ExtractorError('Missing "title" field in extractor result')
 720
 721         if 'playlist' not in info_dict:
 722             # It isn't part of a playlist
 723             info_dict['playlist'] = None
 724             info_dict['playlist_index'] = None
 725
 726         thumbnails = info_dict.get('thumbnails')
 727         if thumbnails:
 728             thumbnails.sort(key=lambda t: (
 729                 t.get('width'), t.get('height'), t.get('url')))
 730             for t in thumbnails:
 731                 if 'width' in t and 'height' in t:
 732                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 733
 734         if thumbnails and 'thumbnail' not in info_dict:
 735             info_dict['thumbnail'] = thumbnails[-1]['url']
 736
 737         if 'display_id' not in info_dict and 'id' in info_dict:
 738             info_dict['display_id'] = info_dict['id']
 739
 740         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 741             upload_date = datetime.datetime.utcfromtimestamp(
 742                 info_dict['timestamp'])
 743             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 744
 745         # This extractors handle format selection themselves
 746         if info_dict['extractor'] in ['Youku']:
 747             if download:
 748                 self.process_info(info_dict)
 749             return info_dict
 750
 751         # We now pick which formats have to be downloaded
 752         if info_dict.get('formats') is None:
 753             # There's only one format available
 754             formats = [info_dict]
 755         else:
 756             formats = info_dict['formats']
 757
 758         if not formats:
 759             raise ExtractorError('No video formats found!')
 760
 761         # We check that all the formats have the format and format_id fields
 762         for i, format in enumerate(formats):
 763             if 'url' not in format:
 764                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 765
 766             if format.get('format_id') is None:
 767                 format['format_id'] = compat_str(i)
 768             if format.get('format') is None:
 769                 format['format'] = '{id} - {res}{note}'.format(
 770                     id=format['format_id'],
 771                     res=self.format_resolution(format),
 772                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 773                 )
 774             # Automatically determine file extension if missing
 775             if 'ext' not in format:
 776                 format['ext'] = determine_ext(format['url']).lower()
 777
 778         format_limit = self.params.get('format_limit', None)
 779         if format_limit:
 780             formats = list(takewhile_inclusive(
 781                 lambda f: f['format_id'] != format_limit, formats
 782             ))
 783
 784         # TODO Central sorting goes here
 785
 786         if formats[0] is not info_dict:
 787             # only set the 'formats' fields if the original info_dict list them
 788             # otherwise we end up with a circular reference, the first (and unique)
 789             # element in the 'formats' field in info_dict is info_dict itself,
 790             # wich can't be exported to json
 791             info_dict['formats'] = formats
 792         if self.params.get('listformats', None):
 793             self.list_formats(info_dict)
 794             return
 795
 796         req_format = self.params.get('format')
 797         if req_format is None:
 798             req_format = 'best'
 799         formats_to_download = []
 800         # The -1 is for supporting YoutubeIE
 801         if req_format in ('-1', 'all'):
 802             formats_to_download = formats
 803         else:
 804             # We can accept formats requested in the format: 34/5/best, we pick
 805             # the first that is available, starting from left
 806             req_formats = req_format.split('/')
 807             for rf in req_formats:
 808                 if re.match(r'.+?\+.+?', rf) is not None:
 809                     # Two formats have been requested like '137+139'
 810                     format_1, format_2 = rf.split('+')
 811                     formats_info = (self.select_format(format_1, formats),
 812                         self.select_format(format_2, formats))
 813                     if all(formats_info):
 814                         selected_format = {
 815                             'requested_formats': formats_info,
 816                             'format': rf,
 817                             'ext': formats_info[0]['ext'],
 818                         }
 819                     else:
 820                         selected_format = None
 821                 else:
 822                     selected_format = self.select_format(rf, formats)
 823                 if selected_format is not None:
 824                     formats_to_download = [selected_format]
 825                     break
 826         if not formats_to_download:
 827             raise ExtractorError('requested format not available',
 828                                  expected=True)
 829
 830         if download:
 831             if len(formats_to_download) > 1:
 832                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 833             for format in formats_to_download:
 834                 new_info = dict(info_dict)
 835                 new_info.update(format)
 836                 self.process_info(new_info)
 837         # We update the info dict with the best quality format (backwards compatibility)
 838         info_dict.update(formats_to_download[-1])
 839         return info_dict
 840
 841     def process_info(self, info_dict):
 842         """Process a single resolved IE result."""
 843
 844         assert info_dict.get('_type', 'video') == 'video'
 845
 846         max_downloads = self.params.get('max_downloads')
 847         if max_downloads is not None:
 848             if self._num_downloads >= int(max_downloads):
 849                 raise MaxDownloadsReached()
 850
 851         info_dict['fulltitle'] = info_dict['title']
 852         if len(info_dict['title']) > 200:
 853             info_dict['title'] = info_dict['title'][:197] + '...'
 854
 855         # Keep for backwards compatibility
 856         info_dict['stitle'] = info_dict['title']
 857
 858         if 'format' not in info_dict:
 859             info_dict['format'] = info_dict['ext']
 860
 861         reason = self._match_entry(info_dict)
 862         if reason is not None:
 863             self.to_screen('[download] ' + reason)
 864             return
 865
 866         self._num_downloads += 1
 867
 868         filename = self.prepare_filename(info_dict)
 869
 870         # Forced printings
 871         if self.params.get('forcetitle', False):
 872             self.to_stdout(info_dict['fulltitle'])
 873         if self.params.get('forceid', False):
 874             self.to_stdout(info_dict['id'])
 875         if self.params.get('forceurl', False):
 876             # For RTMP URLs, also include the playpath
 877             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 878         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 879             self.to_stdout(info_dict['thumbnail'])
 880         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 881             self.to_stdout(info_dict['description'])
 882         if self.params.get('forcefilename', False) and filename is not None:
 883             self.to_stdout(filename)
 884         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 885             self.to_stdout(formatSeconds(info_dict['duration']))
 886         if self.params.get('forceformat', False):
 887             self.to_stdout(info_dict['format'])
 888         if self.params.get('forcejson', False):
 889             info_dict['_filename'] = filename
 890             self.to_stdout(json.dumps(info_dict))
 891
 892         # Do nothing else if in simulate mode
 893         if self.params.get('simulate', False):
 894             return
 895
 896         if filename is None:
 897             return
 898
 899         try:
 900             dn = os.path.dirname(encodeFilename(filename))
 901             if dn and not os.path.exists(dn):
 902                 os.makedirs(dn)
 903         except (OSError, IOError) as err:
 904             self.report_error('unable to create directory ' + compat_str(err))
 905             return
 906
 907         if self.params.get('writedescription', False):
 908             descfn = filename + '.description'
 909             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 910                 self.to_screen('[info] Video description is already present')
 911             else:
 912                 try:
 913                     self.to_screen('[info] Writing video description to: ' + descfn)
 914                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 915                         descfile.write(info_dict['description'])
 916                 except (KeyError, TypeError):
 917                     self.report_warning('There\'s no description to write.')
 918                 except (OSError, IOError):
 919                     self.report_error('Cannot write description file ' + descfn)
 920                     return
 921
 922         if self.params.get('writeannotations', False):
 923             annofn = filename + '.annotations.xml'
 924             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 925                 self.to_screen('[info] Video annotations are already present')
 926             else:
 927                 try:
 928                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 929                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 930                         annofile.write(info_dict['annotations'])
 931                 except (KeyError, TypeError):
 932                     self.report_warning('There are no annotations to write.')
 933                 except (OSError, IOError):
 934                     self.report_error('Cannot write annotations file: ' + annofn)
 935                     return
 936
 937         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 938                                        self.params.get('writeautomaticsub')])
 939
 940         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 941             # subtitles download errors are already managed as troubles in relevant IE
 942             # that way it will silently go on when used with unsupporting IE
 943             subtitles = info_dict['subtitles']
 944             sub_format = self.params.get('subtitlesformat', 'srt')
 945             for sub_lang in subtitles.keys():
 946                 sub = subtitles[sub_lang]
 947                 if sub is None:
 948                     continue
 949                 try:
 950                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 951                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 952                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 953                     else:
 954                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 955                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 956                                 subfile.write(sub)
 957                 except (OSError, IOError):
 958                     self.report_error('Cannot write subtitles file ' + sub_filename)
 959                     return
 960
 961         if self.params.get('writeinfojson', False):
 962             infofn = os.path.splitext(filename)[0] + '.info.json'
 963             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 964                 self.to_screen('[info] Video description metadata is already present')
 965             else:
 966                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 967                 try:
 968                     write_json_file(info_dict, encodeFilename(infofn))
 969                 except (OSError, IOError):
 970                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 971                     return
 972
 973         if self.params.get('writethumbnail', False):
 974             if info_dict.get('thumbnail') is not None:
 975                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 976                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 977                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 978                     self.to_screen('[%s] %s: Thumbnail is already present' %
 979                                    (info_dict['extractor'], info_dict['id']))
 980                 else:
 981                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 982                                    (info_dict['extractor'], info_dict['id']))
 983                     try:
 984                         uf = self.urlopen(info_dict['thumbnail'])
 985                         with open(thumb_filename, 'wb') as thumbf:
 986                             shutil.copyfileobj(uf, thumbf)
 987                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 988                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 989                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 990                         self.report_warning('Unable to download thumbnail "%s": %s' %
 991                             (info_dict['thumbnail'], compat_str(err)))
 992
 993         if not self.params.get('skip_download', False):
 994             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 995                 success = True
 996             else:
 997                 try:
 998                     def dl(name, info):
 999                         fd = get_suitable_downloader(info)(self, self.params)
1000                         for ph in self._progress_hooks:
1001                             fd.add_progress_hook(ph)
1002                         if self.params.get('verbose'):
1003                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1004                         return fd.download(name, info)
1005                     if info_dict.get('requested_formats') is not None:
1006                         downloaded = []
1007                         success = True
1008                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1009                         if not merger._get_executable():
1010                             postprocessors = []
1011                             self.report_warning('You have requested multiple '
1012                                 'formats but ffmpeg or avconv are not installed.'
1013                                 ' The formats won\'t be merged')
1014                         else:
1015                             postprocessors = [merger]
1016                         for f in info_dict['requested_formats']:
1017                             new_info = dict(info_dict)
1018                             new_info.update(f)
1019                             fname = self.prepare_filename(new_info)
1020                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1021                             downloaded.append(fname)
1022                             partial_success = dl(fname, new_info)
1023                             success = success and partial_success
1024                         info_dict['__postprocessors'] = postprocessors
1025                         info_dict['__files_to_merge'] = downloaded
1026                     else:
1027                         # Just a single file
1028                         success = dl(filename, info_dict)
1029                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1030                     self.report_error('unable to download video data: %s' % str(err))
1031                     return
1032                 except (OSError, IOError) as err:
1033                     raise UnavailableVideoError(err)
1034                 except (ContentTooShortError, ) as err:
1035                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1036                     return
1037
1038             if success:
1039                 try:
1040                     self.post_process(filename, info_dict)
1041                 except (PostProcessingError) as err:
1042                     self.report_error('postprocessing: %s' % str(err))
1043                     return
1044
1045         self.record_download_archive(info_dict)
1046
1047     def download(self, url_list):
1048         """Download a given list of URLs."""
1049         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1050         if (len(url_list) > 1 and
1051                 '%' not in outtmpl
1052                 and self.params.get('max_downloads') != 1):
1053             raise SameFileError(outtmpl)
1054
1055         for url in url_list:
1056             try:
1057                 #It also downloads the videos
1058                 self.extract_info(url)
1059             except UnavailableVideoError:
1060                 self.report_error('unable to download video')
1061             except MaxDownloadsReached:
1062                 self.to_screen('[info] Maximum number of downloaded files reached.')
1063                 raise
1064
1065         return self._download_retcode
1066
1067     def download_with_info_file(self, info_filename):
1068         with io.open(info_filename, 'r', encoding='utf-8') as f:
1069             info = json.load(f)
1070         try:
1071             self.process_ie_result(info, download=True)
1072         except DownloadError:
1073             webpage_url = info.get('webpage_url')
1074             if webpage_url is not None:
1075                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1076                 return self.download([webpage_url])
1077             else:
1078                 raise
1079         return self._download_retcode
1080
1081     def post_process(self, filename, ie_info):
1082         """Run all the postprocessors on the given file."""
1083         info = dict(ie_info)
1084         info['filepath'] = filename
1085         keep_video = None
1086         pps_chain = []
1087         if ie_info.get('__postprocessors') is not None:
1088             pps_chain.extend(ie_info['__postprocessors'])
1089         pps_chain.extend(self._pps)
1090         for pp in pps_chain:
1091             try:
1092                 keep_video_wish, new_info = pp.run(info)
1093                 if keep_video_wish is not None:
1094                     if keep_video_wish:
1095                         keep_video = keep_video_wish
1096                     elif keep_video is None:
1097                         # No clear decision yet, let IE decide
1098                         keep_video = keep_video_wish
1099             except PostProcessingError as e:
1100                 self.report_error(e.msg)
1101         if keep_video is False and not self.params.get('keepvideo', False):
1102             try:
1103                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1104                 os.remove(encodeFilename(filename))
1105             except (IOError, OSError):
1106                 self.report_warning('Unable to remove downloaded video file')
1107
1108     def _make_archive_id(self, info_dict):
1109         # Future-proof against any change in case
1110         # and backwards compatibility with prior versions
1111         extractor = info_dict.get('extractor_key')
1112         if extractor is None:
1113             if 'id' in info_dict:
1114                 extractor = info_dict.get('ie_key')  # key in a playlist
1115         if extractor is None:
1116             return None  # Incomplete video information
1117         return extractor.lower() + ' ' + info_dict['id']
1118
1119     def in_download_archive(self, info_dict):
1120         fn = self.params.get('download_archive')
1121         if fn is None:
1122             return False
1123
1124         vid_id = self._make_archive_id(info_dict)
1125         if vid_id is None:
1126             return False  # Incomplete video information
1127
1128         try:
1129             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1130                 for line in archive_file:
1131                     if line.strip() == vid_id:
1132                         return True
1133         except IOError as ioe:
1134             if ioe.errno != errno.ENOENT:
1135                 raise
1136         return False
1137
1138     def record_download_archive(self, info_dict):
1139         fn = self.params.get('download_archive')
1140         if fn is None:
1141             return
1142         vid_id = self._make_archive_id(info_dict)
1143         assert vid_id
1144         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1145             archive_file.write(vid_id + '\n')
1146
1147     @staticmethod
1148     def format_resolution(format, default='unknown'):
1149         if format.get('vcodec') == 'none':
1150             return 'audio only'
1151         if format.get('resolution') is not None:
1152             return format['resolution']
1153         if format.get('height') is not None:
1154             if format.get('width') is not None:
1155                 res = '%sx%s' % (format['width'], format['height'])
1156             else:
1157                 res = '%sp' % format['height']
1158         elif format.get('width') is not None:
1159             res = '?x%d' % format['width']
1160         else:
1161             res = default
1162         return res
1163
1164     def _format_note(self, fdict):
1165         res = ''
1166         if fdict.get('ext') in ['f4f', 'f4m']:
1167             res += '(unsupported) '
1168         if fdict.get('format_note') is not None:
1169             res += fdict['format_note'] + ' '
1170         if fdict.get('tbr') is not None:
1171             res += '%4dk ' % fdict['tbr']
1172         if fdict.get('container') is not None:
1173             if res:
1174                 res += ', '
1175             res += '%s container' % fdict['container']
1176         if (fdict.get('vcodec') is not None and
1177                 fdict.get('vcodec') != 'none'):
1178             if res:
1179                 res += ', '
1180             res += fdict['vcodec']
1181             if fdict.get('vbr') is not None:
1182                 res += '@'
1183         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1184             res += 'video@'
1185         if fdict.get('vbr') is not None:
1186             res += '%4dk' % fdict['vbr']
1187         if fdict.get('acodec') is not None:
1188             if res:
1189                 res += ', '
1190             if fdict['acodec'] == 'none':
1191                 res += 'video only'
1192             else:
1193                 res += '%-5s' % fdict['acodec']
1194         elif fdict.get('abr') is not None:
1195             if res:
1196                 res += ', '
1197             res += 'audio'
1198         if fdict.get('abr') is not None:
1199             res += '@%3dk' % fdict['abr']
1200         if fdict.get('asr') is not None:
1201             res += ' (%5dHz)' % fdict['asr']
1202         if fdict.get('filesize') is not None:
1203             if res:
1204                 res += ', '
1205             res += format_bytes(fdict['filesize'])
1206         elif fdict.get('filesize_approx') is not None:
1207             if res:
1208                 res += ', '
1209             res += '~' + format_bytes(fdict['filesize_approx'])
1210         return res
1211
1212     def list_formats(self, info_dict):
1213         def line(format, idlen=20):
1214             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1215                 format['format_id'],
1216                 format['ext'],
1217                 self.format_resolution(format),
1218                 self._format_note(format),
1219             ))
1220
1221         formats = info_dict.get('formats', [info_dict])
1222         idlen = max(len('format code'),
1223                     max(len(f['format_id']) for f in formats))
1224         formats_s = [line(f, idlen) for f in formats]
1225         if len(formats) > 1:
1226             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1227             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1228
1229         header_line = line({
1230             'format_id': 'format code', 'ext': 'extension',
1231             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1232         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1233                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1234
1235     def urlopen(self, req):
1236         """ Start an HTTP download """
1237         return self._opener.open(req, timeout=self._socket_timeout)
1238
1239     def print_debug_header(self):
1240         if not self.params.get('verbose'):
1241             return
1242
1243         if type('') is not compat_str:
1244             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1245             self.report_warning(
1246                 'Your Python is broken! Update to a newer and supported version')
1247
1248         encoding_str = (
1249             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1250                 locale.getpreferredencoding(),
1251                 sys.getfilesystemencoding(),
1252                 sys.stdout.encoding,
1253                 self.get_encoding()))
1254         write_string(encoding_str, encoding=None)
1255
1256         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1257         try:
1258             sp = subprocess.Popen(
1259                 ['git', 'rev-parse', '--short', 'HEAD'],
1260                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1261                 cwd=os.path.dirname(os.path.abspath(__file__)))
1262             out, err = sp.communicate()
1263             out = out.decode().strip()
1264             if re.match('[0-9a-f]+', out):
1265                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1266         except:
1267             try:
1268                 sys.exc_clear()
1269             except:
1270                 pass
1271         self._write_string('[debug] Python version %s - %s' %
1272                      (platform.python_version(), platform_name()) + '\n')
1273
1274         proxy_map = {}
1275         for handler in self._opener.handlers:
1276             if hasattr(handler, 'proxies'):
1277                 proxy_map.update(handler.proxies)
1278         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1279
1280     def _setup_opener(self):
1281         timeout_val = self.params.get('socket_timeout')
1282         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1283
1284         opts_cookiefile = self.params.get('cookiefile')
1285         opts_proxy = self.params.get('proxy')
1286
1287         if opts_cookiefile is None:
1288             self.cookiejar = compat_cookiejar.CookieJar()
1289         else:
1290             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1291                 opts_cookiefile)
1292             if os.access(opts_cookiefile, os.R_OK):
1293                 self.cookiejar.load()
1294
1295         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1296             self.cookiejar)
1297         if opts_proxy is not None:
1298             if opts_proxy == '':
1299                 proxies = {}
1300             else:
1301                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1302         else:
1303             proxies = compat_urllib_request.getproxies()
1304             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1305             if 'http' in proxies and 'https' not in proxies:
1306                 proxies['https'] = proxies['http']
1307         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1308
1309         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1310         https_handler = make_HTTPS_handler(
1311             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1312         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1313         opener = compat_urllib_request.build_opener(
1314             https_handler, proxy_handler, cookie_processor, ydlh)
1315         # Delete the default user-agent header, which would otherwise apply in
1316         # cases where our custom HTTP handler doesn't come into play
1317         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1318         opener.addheaders = []
1319         self._opener = opener
1320
1321     def encode(self, s):
1322         if isinstance(s, bytes):
1323             return s  # Already encoded
1324
1325         try:
1326             return s.encode(self.get_encoding())
1327         except UnicodeEncodeError as err:
1328             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1329             raise
1330
1331     def get_encoding(self):
1332         encoding = self.params.get('encoding')
1333         if encoding is None:
1334             encoding = preferredencoding()
1335         return encoding