youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32     escape_url,
  33     ContentTooShortError,
  34     date_from_str,
  35     DateRange,
  36     DEFAULT_OUTTMPL,
  37     determine_ext,
  38     DownloadError,
  39     encodeFilename,
  40     ExtractorError,
  41     format_bytes,
  42     formatSeconds,
  43     get_term_width,
  44     locked_file,
  45     make_HTTPS_handler,
  46     MaxDownloadsReached,
  47     PagedList,
  48     PostProcessingError,
  49     platform_name,
  50     preferredencoding,
  51     SameFileError,
  52     sanitize_filename,
  53     subtitles_filename,
  54     takewhile_inclusive,
  55     UnavailableVideoError,
  56     url_basename,
  57     write_json_file,
  58     write_string,
  59     YoutubeDLHandler,
  60     prepend_extension,
  61 )
  62 from .cache import Cache
  63 from .extractor import get_info_extractor, gen_extractors
  64 from .downloader import get_suitable_downloader
  65 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  66 from .version import __version__
  67
  68
  69 class YoutubeDL(object):
  70     """YoutubeDL class.
  71
  72     YoutubeDL objects are the ones responsible of downloading the
  73     actual video file and writing it to disk if the user has requested
  74     it, among some other tasks. In most cases there should be one per
  75     program. As, given a video URL, the downloader doesn't know how to
  76     extract all the needed information, task that InfoExtractors do, it
  77     has to pass the URL to one of them.
  78
  79     For this, YoutubeDL objects have a method that allows
  80     InfoExtractors to be registered in a given order. When it is passed
  81     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  82     finds that reports being able to handle it. The InfoExtractor extracts
  83     all the information about the video or videos the URL refers to, and
  84     YoutubeDL process the extracted information, possibly using a File
  85     Downloader to download the video.
  86
  87     YoutubeDL objects accept a lot of parameters. In order not to saturate
  88     the object constructor with arguments, it receives a dictionary of
  89     options instead. These options are available through the params
  90     attribute for the InfoExtractors to use. The YoutubeDL also
  91     registers itself as the downloader in charge for the InfoExtractors
  92     that are added to it, so this is a "mutual registration".
  93
  94     Available options:
  95
  96     username:          Username for authentication purposes.
  97     password:          Password for authentication purposes.
  98     videopassword:     Password for acces a video.
  99     usenetrc:          Use netrc for authentication instead.
 100     verbose:           Print additional info to stdout.
 101     quiet:             Do not print messages to stdout.
 102     no_warnings:       Do not print out anything for warnings.
 103     forceurl:          Force printing final URL.
 104     forcetitle:        Force printing title.
 105     forceid:           Force printing ID.
 106     forcethumbnail:    Force printing thumbnail URL.
 107     forcedescription:  Force printing description.
 108     forcefilename:     Force printing final filename.
 109     forceduration:     Force printing duration.
 110     forcejson:         Force printing info_dict as JSON.
 111     dump_single_json:  Force printing the info_dict of the whole playlist
 112                        (or video) as a single JSON line.
 113     simulate:          Do not download the video files.
 114     format:            Video format code.
 115     format_limit:      Highest quality format to try.
 116     outtmpl:           Template for output names.
 117     restrictfilenames: Do not allow "&" and spaces in file names
 118     ignoreerrors:      Do not stop on download errors.
 119     nooverwrites:      Prevent overwriting files.
 120     playliststart:     Playlist item to start at.
 121     playlistend:       Playlist item to end at.
 122     matchtitle:        Download only matching titles.
 123     rejecttitle:       Reject downloads for matching titles.
 124     logger:            Log messages to a logging.Logger instance.
 125     logtostderr:       Log messages to stderr instead of stdout.
 126     writedescription:  Write the video description to a .description file
 127     writeinfojson:     Write the video description to a .info.json file
 128     writeannotations:  Write the video annotations to a .annotations.xml file
 129     writethumbnail:    Write the thumbnail image to a file
 130     writesubtitles:    Write the video subtitles to a file
 131     writeautomaticsub: Write the automatic subtitles to a file
 132     allsubtitles:      Downloads all the subtitles of the video
 133                        (requires writesubtitles or writeautomaticsub)
 134     listsubtitles:     Lists all available subtitles for the video
 135     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 136     subtitleslangs:    List of languages of the subtitles to download
 137     keepvideo:         Keep the video file after post-processing
 138     daterange:         A DateRange object, download only if the upload_date is in the range.
 139     skip_download:     Skip the actual download of the video file
 140     cachedir:          Location of the cache files in the filesystem.
 141                        False to disable filesystem cache.
 142     noplaylist:        Download single video instead of a playlist if in doubt.
 143     age_limit:         An integer representing the user's age in years.
 144                        Unsuitable videos for the given age are skipped.
 145     min_views:         An integer representing the minimum view count the video
 146                        must have in order to not be skipped.
 147                        Videos without view count information are always
 148                        downloaded. None for no limit.
 149     max_views:         An integer representing the maximum view count.
 150                        Videos that are more popular than that are not
 151                        downloaded.
 152                        Videos without view count information are always
 153                        downloaded. None for no limit.
 154     download_archive:  File name of a file where all downloads are recorded.
 155                        Videos already present in the file are not downloaded
 156                        again.
 157     cookiefile:        File name where cookies should be read from and dumped to.
 158     nocheckcertificate:Do not verify SSL certificates
 159     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 160                        At the moment, this is only supported by YouTube.
 161     proxy:             URL of the proxy server to use
 162     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 163     bidi_workaround:   Work around buggy terminals without bidirectional text
 164                        support, using fridibi
 165     debug_printtraffic:Print out sent and received HTTP traffic
 166     include_ads:       Download ads as well
 167     default_search:    Prepend this string if an input url is not valid.
 168                        'auto' for elaborate guessing
 169     encoding:          Use this encoding instead of the system-specified.
 170     extract_flat:      Do not resolve URLs, return the immediate result.
 171                        Pass in 'in_playlist' to only show this behavior for
 172                        playlist items.
 173
 174     The following parameters are not used by YoutubeDL itself, they are used by
 175     the FileDownloader:
 176     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 177     noresizebuffer, retries, continuedl, noprogress, consoletitle
 178
 179     The following options are used by the post processors:
 180     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 181                        otherwise prefer avconv.
 182     exec_cmd:          Arbitrary command to run after downloading
 183     """
 184
 185     params = None
 186     _ies = []
 187     _pps = []
 188     _download_retcode = None
 189     _num_downloads = None
 190     _screen_file = None
 191
 192     def __init__(self, params=None):
 193         """Create a FileDownloader object with the given options."""
 194         if params is None:
 195             params = {}
 196         self._ies = []
 197         self._ies_instances = {}
 198         self._pps = []
 199         self._progress_hooks = []
 200         self._download_retcode = 0
 201         self._num_downloads = 0
 202         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 203         self._err_file = sys.stderr
 204         self.params = params
 205         self.cache = Cache(self)
 206
 207         if params.get('bidi_workaround', False):
 208             try:
 209                 import pty
 210                 master, slave = pty.openpty()
 211                 width = get_term_width()
 212                 if width is None:
 213                     width_args = []
 214                 else:
 215                     width_args = ['-w', str(width)]
 216                 sp_kwargs = dict(
 217                     stdin=subprocess.PIPE,
 218                     stdout=slave,
 219                     stderr=self._err_file)
 220                 try:
 221                     self._output_process = subprocess.Popen(
 222                         ['bidiv'] + width_args, **sp_kwargs
 223                     )
 224                 except OSError:
 225                     self._output_process = subprocess.Popen(
 226                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 227                 self._output_channel = os.fdopen(master, 'rb')
 228             except OSError as ose:
 229                 if ose.errno == 2:
 230                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 231                 else:
 232                     raise
 233
 234         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 235                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 236                 and not params.get('restrictfilenames', False)):
 237             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 238             self.report_warning(
 239                 'Assuming --restrict-filenames since file system encoding '
 240                 'cannot encode all characters. '
 241                 'Set the LC_ALL environment variable to fix this.')
 242             self.params['restrictfilenames'] = True
 243
 244         if '%(stitle)s' in self.params.get('outtmpl', ''):
 245             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 246
 247         self._setup_opener()
 248
 249     def add_info_extractor(self, ie):
 250         """Add an InfoExtractor object to the end of the list."""
 251         self._ies.append(ie)
 252         self._ies_instances[ie.ie_key()] = ie
 253         ie.set_downloader(self)
 254
 255     def get_info_extractor(self, ie_key):
 256         """
 257         Get an instance of an IE with name ie_key, it will try to get one from
 258         the _ies list, if there's no instance it will create a new one and add
 259         it to the extractor list.
 260         """
 261         ie = self._ies_instances.get(ie_key)
 262         if ie is None:
 263             ie = get_info_extractor(ie_key)()
 264             self.add_info_extractor(ie)
 265         return ie
 266
 267     def add_default_info_extractors(self):
 268         """
 269         Add the InfoExtractors returned by gen_extractors to the end of the list
 270         """
 271         for ie in gen_extractors():
 272             self.add_info_extractor(ie)
 273
 274     def add_post_processor(self, pp):
 275         """Add a PostProcessor object to the end of the chain."""
 276         self._pps.append(pp)
 277         pp.set_downloader(self)
 278
 279     def add_progress_hook(self, ph):
 280         """Add the progress hook (currently only for the file downloader)"""
 281         self._progress_hooks.append(ph)
 282
 283     def _bidi_workaround(self, message):
 284         if not hasattr(self, '_output_channel'):
 285             return message
 286
 287         assert hasattr(self, '_output_process')
 288         assert isinstance(message, compat_str)
 289         line_count = message.count('\n') + 1
 290         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 291         self._output_process.stdin.flush()
 292         res = ''.join(self._output_channel.readline().decode('utf-8')
 293                        for _ in range(line_count))
 294         return res[:-len('\n')]
 295
 296     def to_screen(self, message, skip_eol=False):
 297         """Print message to stdout if not in quiet mode."""
 298         return self.to_stdout(message, skip_eol, check_quiet=True)
 299
 300     def _write_string(self, s, out=None):
 301         write_string(s, out=out, encoding=self.params.get('encoding'))
 302
 303     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 304         """Print message to stdout if not in quiet mode."""
 305         if self.params.get('logger'):
 306             self.params['logger'].debug(message)
 307         elif not check_quiet or not self.params.get('quiet', False):
 308             message = self._bidi_workaround(message)
 309             terminator = ['\n', ''][skip_eol]
 310             output = message + terminator
 311
 312             self._write_string(output, self._screen_file)
 313
 314     def to_stderr(self, message):
 315         """Print message to stderr."""
 316         assert isinstance(message, compat_str)
 317         if self.params.get('logger'):
 318             self.params['logger'].error(message)
 319         else:
 320             message = self._bidi_workaround(message)
 321             output = message + '\n'
 322             self._write_string(output, self._err_file)
 323
 324     def to_console_title(self, message):
 325         if not self.params.get('consoletitle', False):
 326             return
 327         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 328             # c_wchar_p() might not be necessary if `message` is
 329             # already of type unicode()
 330             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 331         elif 'TERM' in os.environ:
 332             self._write_string('\033]0;%s\007' % message, self._screen_file)
 333
 334     def save_console_title(self):
 335         if not self.params.get('consoletitle', False):
 336             return
 337         if 'TERM' in os.environ:
 338             # Save the title on stack
 339             self._write_string('\033[22;0t', self._screen_file)
 340
 341     def restore_console_title(self):
 342         if not self.params.get('consoletitle', False):
 343             return
 344         if 'TERM' in os.environ:
 345             # Restore the title from stack
 346             self._write_string('\033[23;0t', self._screen_file)
 347
 348     def __enter__(self):
 349         self.save_console_title()
 350         return self
 351
 352     def __exit__(self, *args):
 353         self.restore_console_title()
 354
 355         if self.params.get('cookiefile') is not None:
 356             self.cookiejar.save()
 357
 358     def trouble(self, message=None, tb=None):
 359         """Determine action to take when a download problem appears.
 360
 361         Depending on if the downloader has been configured to ignore
 362         download errors or not, this method may throw an exception or
 363         not when errors are found, after printing the message.
 364
 365         tb, if given, is additional traceback information.
 366         """
 367         if message is not None:
 368             self.to_stderr(message)
 369         if self.params.get('verbose'):
 370             if tb is None:
 371                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 372                     tb = ''
 373                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 374                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 375                     tb += compat_str(traceback.format_exc())
 376                 else:
 377                     tb_data = traceback.format_list(traceback.extract_stack())
 378                     tb = ''.join(tb_data)
 379             self.to_stderr(tb)
 380         if not self.params.get('ignoreerrors', False):
 381             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 382                 exc_info = sys.exc_info()[1].exc_info
 383             else:
 384                 exc_info = sys.exc_info()
 385             raise DownloadError(message, exc_info)
 386         self._download_retcode = 1
 387
 388     def report_warning(self, message):
 389         '''
 390         Print the message to stderr, it will be prefixed with 'WARNING:'
 391         If stderr is a tty file the 'WARNING:' will be colored
 392         '''
 393         if self.params.get('logger') is not None:
 394             self.params['logger'].warning(message)
 395         else:
 396             if self.params.get('no_warnings'):
 397                 return
 398             if self._err_file.isatty() and os.name != 'nt':
 399                 _msg_header = '\033[0;33mWARNING:\033[0m'
 400             else:
 401                 _msg_header = 'WARNING:'
 402             warning_message = '%s %s' % (_msg_header, message)
 403             self.to_stderr(warning_message)
 404
 405     def report_error(self, message, tb=None):
 406         '''
 407         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 408         in red if stderr is a tty file.
 409         '''
 410         if self._err_file.isatty() and os.name != 'nt':
 411             _msg_header = '\033[0;31mERROR:\033[0m'
 412         else:
 413             _msg_header = 'ERROR:'
 414         error_message = '%s %s' % (_msg_header, message)
 415         self.trouble(error_message, tb)
 416
 417     def report_file_already_downloaded(self, file_name):
 418         """Report file has already been fully downloaded."""
 419         try:
 420             self.to_screen('[download] %s has already been downloaded' % file_name)
 421         except UnicodeEncodeError:
 422             self.to_screen('[download] The file has already been downloaded')
 423
 424     def prepare_filename(self, info_dict):
 425         """Generate the output filename."""
 426         try:
 427             template_dict = dict(info_dict)
 428
 429             template_dict['epoch'] = int(time.time())
 430             autonumber_size = self.params.get('autonumber_size')
 431             if autonumber_size is None:
 432                 autonumber_size = 5
 433             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 434             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 435             if template_dict.get('playlist_index') is not None:
 436                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 437             if template_dict.get('resolution') is None:
 438                 if template_dict.get('width') and template_dict.get('height'):
 439                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 440                 elif template_dict.get('height'):
 441                     template_dict['resolution'] = '%sp' % template_dict['height']
 442                 elif template_dict.get('width'):
 443                     template_dict['resolution'] = '?x%d' % template_dict['width']
 444
 445             sanitize = lambda k, v: sanitize_filename(
 446                 compat_str(v),
 447                 restricted=self.params.get('restrictfilenames'),
 448                 is_id=(k == 'id'))
 449             template_dict = dict((k, sanitize(k, v))
 450                                  for k, v in template_dict.items()
 451                                  if v is not None)
 452             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 453
 454             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 455             tmpl = compat_expanduser(outtmpl)
 456             filename = tmpl % template_dict
 457             return filename
 458         except ValueError as err:
 459             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 460             return None
 461
 462     def _match_entry(self, info_dict):
 463         """ Returns None iff the file should be downloaded """
 464
 465         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 466         if 'title' in info_dict:
 467             # This can happen when we're just evaluating the playlist
 468             title = info_dict['title']
 469             matchtitle = self.params.get('matchtitle', False)
 470             if matchtitle:
 471                 if not re.search(matchtitle, title, re.IGNORECASE):
 472                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 473             rejecttitle = self.params.get('rejecttitle', False)
 474             if rejecttitle:
 475                 if re.search(rejecttitle, title, re.IGNORECASE):
 476                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 477         date = info_dict.get('upload_date', None)
 478         if date is not None:
 479             dateRange = self.params.get('daterange', DateRange())
 480             if date not in dateRange:
 481                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 482         view_count = info_dict.get('view_count', None)
 483         if view_count is not None:
 484             min_views = self.params.get('min_views')
 485             if min_views is not None and view_count < min_views:
 486                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 487             max_views = self.params.get('max_views')
 488             if max_views is not None and view_count > max_views:
 489                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 490         age_limit = self.params.get('age_limit')
 491         if age_limit is not None:
 492             actual_age_limit = info_dict.get('age_limit')
 493             if actual_age_limit is None:
 494                 actual_age_limit = 0
 495             if age_limit < actual_age_limit:
 496                 return 'Skipping "' + title + '" because it is age restricted'
 497         if self.in_download_archive(info_dict):
 498             return '%s has already been recorded in archive' % video_title
 499         return None
 500
 501     @staticmethod
 502     def add_extra_info(info_dict, extra_info):
 503         '''Set the keys from extra_info in info dict if they are missing'''
 504         for key, value in extra_info.items():
 505             info_dict.setdefault(key, value)
 506
 507     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 508                      process=True):
 509         '''
 510         Returns a list with a dictionary for each video we find.
 511         If 'download', also downloads the videos.
 512         extra_info is a dict containing the extra values to add to each result
 513          '''
 514
 515         if ie_key:
 516             ies = [self.get_info_extractor(ie_key)]
 517         else:
 518             ies = self._ies
 519
 520         for ie in ies:
 521             if not ie.suitable(url):
 522                 continue
 523
 524             if not ie.working():
 525                 self.report_warning('The program functionality for this site has been marked as broken, '
 526                                     'and will probably not work.')
 527
 528             try:
 529                 ie_result = ie.extract(url)
 530                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 531                     break
 532                 if isinstance(ie_result, list):
 533                     # Backwards compatibility: old IE result format
 534                     ie_result = {
 535                         '_type': 'compat_list',
 536                         'entries': ie_result,
 537                     }
 538                 self.add_default_extra_info(ie_result, ie, url)
 539                 if process:
 540                     return self.process_ie_result(ie_result, download, extra_info)
 541                 else:
 542                     return ie_result
 543             except ExtractorError as de: # An error we somewhat expected
 544                 self.report_error(compat_str(de), de.format_traceback())
 545                 break
 546             except MaxDownloadsReached:
 547                 raise
 548             except Exception as e:
 549                 if self.params.get('ignoreerrors', False):
 550                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 551                     break
 552                 else:
 553                     raise
 554         else:
 555             self.report_error('no suitable InfoExtractor for URL %s' % url)
 556
 557     def add_default_extra_info(self, ie_result, ie, url):
 558         self.add_extra_info(ie_result, {
 559             'extractor': ie.IE_NAME,
 560             'webpage_url': url,
 561             'webpage_url_basename': url_basename(url),
 562             'extractor_key': ie.ie_key(),
 563         })
 564
 565     def process_ie_result(self, ie_result, download=True, extra_info={}):
 566         """
 567         Take the result of the ie(may be modified) and resolve all unresolved
 568         references (URLs, playlist items).
 569
 570         It will also download the videos if 'download'.
 571         Returns the resolved ie_result.
 572         """
 573
 574         result_type = ie_result.get('_type', 'video')
 575
 576         if result_type in ('url', 'url_transparent'):
 577             extract_flat = self.params.get('extract_flat', False)
 578             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 579                     extract_flat is True):
 580                 if self.params.get('forcejson', False):
 581                     self.to_stdout(json.dumps(ie_result))
 582                 return ie_result
 583
 584         if result_type == 'video':
 585             self.add_extra_info(ie_result, extra_info)
 586             return self.process_video_result(ie_result, download=download)
 587         elif result_type == 'url':
 588             # We have to add extra_info to the results because it may be
 589             # contained in a playlist
 590             return self.extract_info(ie_result['url'],
 591                                      download,
 592                                      ie_key=ie_result.get('ie_key'),
 593                                      extra_info=extra_info)
 594         elif result_type == 'url_transparent':
 595             # Use the information from the embedding page
 596             info = self.extract_info(
 597                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 598                 extra_info=extra_info, download=False, process=False)
 599
 600             def make_result(embedded_info):
 601                 new_result = ie_result.copy()
 602                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 603                           'entries', 'ie_key', 'duration',
 604                           'subtitles', 'annotations', 'format',
 605                           'thumbnail', 'thumbnails'):
 606                     if f in new_result:
 607                         del new_result[f]
 608                     if f in embedded_info:
 609                         new_result[f] = embedded_info[f]
 610                 return new_result
 611             new_result = make_result(info)
 612
 613             assert new_result.get('_type') != 'url_transparent'
 614             if new_result.get('_type') == 'compat_list':
 615                 new_result['entries'] = [
 616                     make_result(e) for e in new_result['entries']]
 617
 618             return self.process_ie_result(
 619                 new_result, download=download, extra_info=extra_info)
 620         elif result_type == 'playlist':
 621             # We process each entry in the playlist
 622             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 623             self.to_screen('[download] Downloading playlist: %s' % playlist)
 624
 625             playlist_results = []
 626
 627             playliststart = self.params.get('playliststart', 1) - 1
 628             playlistend = self.params.get('playlistend', None)
 629             # For backwards compatibility, interpret -1 as whole list
 630             if playlistend == -1:
 631                 playlistend = None
 632
 633             if isinstance(ie_result['entries'], list):
 634                 n_all_entries = len(ie_result['entries'])
 635                 entries = ie_result['entries'][playliststart:playlistend]
 636                 n_entries = len(entries)
 637                 self.to_screen(
 638                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 639                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 640             else:
 641                 assert isinstance(ie_result['entries'], PagedList)
 642                 entries = ie_result['entries'].getslice(
 643                     playliststart, playlistend)
 644                 n_entries = len(entries)
 645                 self.to_screen(
 646                     "[%s] playlist %s: Downloading %d videos" %
 647                     (ie_result['extractor'], playlist, n_entries))
 648
 649             for i, entry in enumerate(entries, 1):
 650                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 651                 extra = {
 652                     'n_entries': n_entries,
 653                     'playlist': playlist,
 654                     'playlist_index': i + playliststart,
 655                     'extractor': ie_result['extractor'],
 656                     'webpage_url': ie_result['webpage_url'],
 657                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 658                     'extractor_key': ie_result['extractor_key'],
 659                 }
 660
 661                 reason = self._match_entry(entry)
 662                 if reason is not None:
 663                     self.to_screen('[download] ' + reason)
 664                     continue
 665
 666                 entry_result = self.process_ie_result(entry,
 667                                                       download=download,
 668                                                       extra_info=extra)
 669                 playlist_results.append(entry_result)
 670             ie_result['entries'] = playlist_results
 671             return ie_result
 672         elif result_type == 'compat_list':
 673             def _fixup(r):
 674                 self.add_extra_info(r,
 675                     {
 676                         'extractor': ie_result['extractor'],
 677                         'webpage_url': ie_result['webpage_url'],
 678                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 679                         'extractor_key': ie_result['extractor_key'],
 680                     })
 681                 return r
 682             ie_result['entries'] = [
 683                 self.process_ie_result(_fixup(r), download, extra_info)
 684                 for r in ie_result['entries']
 685             ]
 686             return ie_result
 687         else:
 688             raise Exception('Invalid result type: %s' % result_type)
 689
 690     def select_format(self, format_spec, available_formats):
 691         if format_spec == 'best' or format_spec is None:
 692             return available_formats[-1]
 693         elif format_spec == 'worst':
 694             return available_formats[0]
 695         elif format_spec == 'bestaudio':
 696             audio_formats = [
 697                 f for f in available_formats
 698                 if f.get('vcodec') == 'none']
 699             if audio_formats:
 700                 return audio_formats[-1]
 701         elif format_spec == 'worstaudio':
 702             audio_formats = [
 703                 f for f in available_formats
 704                 if f.get('vcodec') == 'none']
 705             if audio_formats:
 706                 return audio_formats[0]
 707         elif format_spec == 'bestvideo':
 708             video_formats = [
 709                 f for f in available_formats
 710                 if f.get('acodec') == 'none']
 711             if video_formats:
 712                 return video_formats[-1]
 713         elif format_spec == 'worstvideo':
 714             video_formats = [
 715                 f for f in available_formats
 716                 if f.get('acodec') == 'none']
 717             if video_formats:
 718                 return video_formats[0]
 719         else:
 720             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 721             if format_spec in extensions:
 722                 filter_f = lambda f: f['ext'] == format_spec
 723             else:
 724                 filter_f = lambda f: f['format_id'] == format_spec
 725             matches = list(filter(filter_f, available_formats))
 726             if matches:
 727                 return matches[-1]
 728         return None
 729
 730     def process_video_result(self, info_dict, download=True):
 731         assert info_dict.get('_type', 'video') == 'video'
 732
 733         if 'id' not in info_dict:
 734             raise ExtractorError('Missing "id" field in extractor result')
 735         if 'title' not in info_dict:
 736             raise ExtractorError('Missing "title" field in extractor result')
 737
 738         if 'playlist' not in info_dict:
 739             # It isn't part of a playlist
 740             info_dict['playlist'] = None
 741             info_dict['playlist_index'] = None
 742
 743         thumbnails = info_dict.get('thumbnails')
 744         if thumbnails:
 745             thumbnails.sort(key=lambda t: (
 746                 t.get('width'), t.get('height'), t.get('url')))
 747             for t in thumbnails:
 748                 if 'width' in t and 'height' in t:
 749                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 750
 751         if thumbnails and 'thumbnail' not in info_dict:
 752             info_dict['thumbnail'] = thumbnails[-1]['url']
 753
 754         if 'display_id' not in info_dict and 'id' in info_dict:
 755             info_dict['display_id'] = info_dict['id']
 756
 757         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 758             upload_date = datetime.datetime.utcfromtimestamp(
 759                 info_dict['timestamp'])
 760             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 761
 762         # This extractors handle format selection themselves
 763         if info_dict['extractor'] in ['Youku']:
 764             if download:
 765                 self.process_info(info_dict)
 766             return info_dict
 767
 768         # We now pick which formats have to be downloaded
 769         if info_dict.get('formats') is None:
 770             # There's only one format available
 771             formats = [info_dict]
 772         else:
 773             formats = info_dict['formats']
 774
 775         if not formats:
 776             raise ExtractorError('No video formats found!')
 777
 778         # We check that all the formats have the format and format_id fields
 779         for i, format in enumerate(formats):
 780             if 'url' not in format:
 781                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 782
 783             if format.get('format_id') is None:
 784                 format['format_id'] = compat_str(i)
 785             if format.get('format') is None:
 786                 format['format'] = '{id} - {res}{note}'.format(
 787                     id=format['format_id'],
 788                     res=self.format_resolution(format),
 789                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 790                 )
 791             # Automatically determine file extension if missing
 792             if 'ext' not in format:
 793                 format['ext'] = determine_ext(format['url']).lower()
 794
 795         format_limit = self.params.get('format_limit', None)
 796         if format_limit:
 797             formats = list(takewhile_inclusive(
 798                 lambda f: f['format_id'] != format_limit, formats
 799             ))
 800
 801         # TODO Central sorting goes here
 802
 803         if formats[0] is not info_dict:
 804             # only set the 'formats' fields if the original info_dict list them
 805             # otherwise we end up with a circular reference, the first (and unique)
 806             # element in the 'formats' field in info_dict is info_dict itself,
 807             # wich can't be exported to json
 808             info_dict['formats'] = formats
 809         if self.params.get('listformats', None):
 810             self.list_formats(info_dict)
 811             return
 812
 813         req_format = self.params.get('format')
 814         if req_format is None:
 815             req_format = 'best'
 816         formats_to_download = []
 817         # The -1 is for supporting YoutubeIE
 818         if req_format in ('-1', 'all'):
 819             formats_to_download = formats
 820         else:
 821             for rfstr in req_format.split(','):
 822                 # We can accept formats requested in the format: 34/5/best, we pick
 823                 # the first that is available, starting from left
 824                 req_formats = rfstr.split('/')
 825                 for rf in req_formats:
 826                     if re.match(r'.+?\+.+?', rf) is not None:
 827                         # Two formats have been requested like '137+139'
 828                         format_1, format_2 = rf.split('+')
 829                         formats_info = (self.select_format(format_1, formats),
 830                             self.select_format(format_2, formats))
 831                         if all(formats_info):
 832                             selected_format = {
 833                                 'requested_formats': formats_info,
 834                                 'format': rf,
 835                                 'ext': formats_info[0]['ext'],
 836                             }
 837                         else:
 838                             selected_format = None
 839                     else:
 840                         selected_format = self.select_format(rf, formats)
 841                     if selected_format is not None:
 842                         formats_to_download.append(selected_format)
 843                         break
 844         if not formats_to_download:
 845             raise ExtractorError('requested format not available',
 846                                  expected=True)
 847
 848         if download:
 849             if len(formats_to_download) > 1:
 850                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 851             for format in formats_to_download:
 852                 new_info = dict(info_dict)
 853                 new_info.update(format)
 854                 self.process_info(new_info)
 855         # We update the info dict with the best quality format (backwards compatibility)
 856         info_dict.update(formats_to_download[-1])
 857         return info_dict
 858
 859     def process_info(self, info_dict):
 860         """Process a single resolved IE result."""
 861
 862         assert info_dict.get('_type', 'video') == 'video'
 863
 864         max_downloads = self.params.get('max_downloads')
 865         if max_downloads is not None:
 866             if self._num_downloads >= int(max_downloads):
 867                 raise MaxDownloadsReached()
 868
 869         info_dict['fulltitle'] = info_dict['title']
 870         if len(info_dict['title']) > 200:
 871             info_dict['title'] = info_dict['title'][:197] + '...'
 872
 873         # Keep for backwards compatibility
 874         info_dict['stitle'] = info_dict['title']
 875
 876         if 'format' not in info_dict:
 877             info_dict['format'] = info_dict['ext']
 878
 879         reason = self._match_entry(info_dict)
 880         if reason is not None:
 881             self.to_screen('[download] ' + reason)
 882             return
 883
 884         self._num_downloads += 1
 885
 886         filename = self.prepare_filename(info_dict)
 887
 888         # Forced printings
 889         if self.params.get('forcetitle', False):
 890             self.to_stdout(info_dict['fulltitle'])
 891         if self.params.get('forceid', False):
 892             self.to_stdout(info_dict['id'])
 893         if self.params.get('forceurl', False):
 894             # For RTMP URLs, also include the playpath
 895             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 896         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 897             self.to_stdout(info_dict['thumbnail'])
 898         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 899             self.to_stdout(info_dict['description'])
 900         if self.params.get('forcefilename', False) and filename is not None:
 901             self.to_stdout(filename)
 902         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 903             self.to_stdout(formatSeconds(info_dict['duration']))
 904         if self.params.get('forceformat', False):
 905             self.to_stdout(info_dict['format'])
 906         if self.params.get('forcejson', False):
 907             info_dict['_filename'] = filename
 908             self.to_stdout(json.dumps(info_dict))
 909         if self.params.get('dump_single_json', False):
 910             info_dict['_filename'] = filename
 911
 912         # Do nothing else if in simulate mode
 913         if self.params.get('simulate', False):
 914             return
 915
 916         if filename is None:
 917             return
 918
 919         try:
 920             dn = os.path.dirname(encodeFilename(filename))
 921             if dn and not os.path.exists(dn):
 922                 os.makedirs(dn)
 923         except (OSError, IOError) as err:
 924             self.report_error('unable to create directory ' + compat_str(err))
 925             return
 926
 927         if self.params.get('writedescription', False):
 928             descfn = filename + '.description'
 929             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 930                 self.to_screen('[info] Video description is already present')
 931             else:
 932                 try:
 933                     self.to_screen('[info] Writing video description to: ' + descfn)
 934                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 935                         descfile.write(info_dict['description'])
 936                 except (KeyError, TypeError):
 937                     self.report_warning('There\'s no description to write.')
 938                 except (OSError, IOError):
 939                     self.report_error('Cannot write description file ' + descfn)
 940                     return
 941
 942         if self.params.get('writeannotations', False):
 943             annofn = filename + '.annotations.xml'
 944             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 945                 self.to_screen('[info] Video annotations are already present')
 946             else:
 947                 try:
 948                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 949                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 950                         annofile.write(info_dict['annotations'])
 951                 except (KeyError, TypeError):
 952                     self.report_warning('There are no annotations to write.')
 953                 except (OSError, IOError):
 954                     self.report_error('Cannot write annotations file: ' + annofn)
 955                     return
 956
 957         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 958                                        self.params.get('writeautomaticsub')])
 959
 960         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 961             # subtitles download errors are already managed as troubles in relevant IE
 962             # that way it will silently go on when used with unsupporting IE
 963             subtitles = info_dict['subtitles']
 964             sub_format = self.params.get('subtitlesformat', 'srt')
 965             for sub_lang in subtitles.keys():
 966                 sub = subtitles[sub_lang]
 967                 if sub is None:
 968                     continue
 969                 try:
 970                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 971                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 972                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 973                     else:
 974                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 975                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 976                                 subfile.write(sub)
 977                 except (OSError, IOError):
 978                     self.report_error('Cannot write subtitles file ' + sub_filename)
 979                     return
 980
 981         if self.params.get('writeinfojson', False):
 982             infofn = os.path.splitext(filename)[0] + '.info.json'
 983             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 984                 self.to_screen('[info] Video description metadata is already present')
 985             else:
 986                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 987                 try:
 988                     write_json_file(info_dict, encodeFilename(infofn))
 989                 except (OSError, IOError):
 990                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 991                     return
 992
 993         if self.params.get('writethumbnail', False):
 994             if info_dict.get('thumbnail') is not None:
 995                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 996                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 997                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 998                     self.to_screen('[%s] %s: Thumbnail is already present' %
 999                                    (info_dict['extractor'], info_dict['id']))
1000                 else:
1001                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1002                                    (info_dict['extractor'], info_dict['id']))
1003                     try:
1004                         uf = self.urlopen(info_dict['thumbnail'])
1005                         with open(thumb_filename, 'wb') as thumbf:
1006                             shutil.copyfileobj(uf, thumbf)
1007                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1008                             (info_dict['extractor'], info_dict['id'], thumb_filename))
1009                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1010                         self.report_warning('Unable to download thumbnail "%s": %s' %
1011                             (info_dict['thumbnail'], compat_str(err)))
1012
1013         if not self.params.get('skip_download', False):
1014             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1015                 success = True
1016             else:
1017                 try:
1018                     def dl(name, info):
1019                         fd = get_suitable_downloader(info)(self, self.params)
1020                         for ph in self._progress_hooks:
1021                             fd.add_progress_hook(ph)
1022                         if self.params.get('verbose'):
1023                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1024                         return fd.download(name, info)
1025                     if info_dict.get('requested_formats') is not None:
1026                         downloaded = []
1027                         success = True
1028                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1029                         if not merger._get_executable():
1030                             postprocessors = []
1031                             self.report_warning('You have requested multiple '
1032                                 'formats but ffmpeg or avconv are not installed.'
1033                                 ' The formats won\'t be merged')
1034                         else:
1035                             postprocessors = [merger]
1036                         for f in info_dict['requested_formats']:
1037                             new_info = dict(info_dict)
1038                             new_info.update(f)
1039                             fname = self.prepare_filename(new_info)
1040                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1041                             downloaded.append(fname)
1042                             partial_success = dl(fname, new_info)
1043                             success = success and partial_success
1044                         info_dict['__postprocessors'] = postprocessors
1045                         info_dict['__files_to_merge'] = downloaded
1046                     else:
1047                         # Just a single file
1048                         success = dl(filename, info_dict)
1049                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1050                     self.report_error('unable to download video data: %s' % str(err))
1051                     return
1052                 except (OSError, IOError) as err:
1053                     raise UnavailableVideoError(err)
1054                 except (ContentTooShortError, ) as err:
1055                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1056                     return
1057
1058             if success:
1059                 try:
1060                     self.post_process(filename, info_dict)
1061                 except (PostProcessingError) as err:
1062                     self.report_error('postprocessing: %s' % str(err))
1063                     return
1064
1065         self.record_download_archive(info_dict)
1066
1067     def download(self, url_list):
1068         """Download a given list of URLs."""
1069         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1070         if (len(url_list) > 1 and
1071                 '%' not in outtmpl
1072                 and self.params.get('max_downloads') != 1):
1073             raise SameFileError(outtmpl)
1074
1075         for url in url_list:
1076             try:
1077                 #It also downloads the videos
1078                 res = self.extract_info(url)
1079             except UnavailableVideoError:
1080                 self.report_error('unable to download video')
1081             except MaxDownloadsReached:
1082                 self.to_screen('[info] Maximum number of downloaded files reached.')
1083                 raise
1084             else:
1085                 if self.params.get('dump_single_json', False):
1086                     self.to_stdout(json.dumps(res))
1087
1088         return self._download_retcode
1089
1090     def download_with_info_file(self, info_filename):
1091         with io.open(info_filename, 'r', encoding='utf-8') as f:
1092             info = json.load(f)
1093         try:
1094             self.process_ie_result(info, download=True)
1095         except DownloadError:
1096             webpage_url = info.get('webpage_url')
1097             if webpage_url is not None:
1098                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1099                 return self.download([webpage_url])
1100             else:
1101                 raise
1102         return self._download_retcode
1103
1104     def post_process(self, filename, ie_info):
1105         """Run all the postprocessors on the given file."""
1106         info = dict(ie_info)
1107         info['filepath'] = filename
1108         keep_video = None
1109         pps_chain = []
1110         if ie_info.get('__postprocessors') is not None:
1111             pps_chain.extend(ie_info['__postprocessors'])
1112         pps_chain.extend(self._pps)
1113         for pp in pps_chain:
1114             try:
1115                 keep_video_wish, new_info = pp.run(info)
1116                 if keep_video_wish is not None:
1117                     if keep_video_wish:
1118                         keep_video = keep_video_wish
1119                     elif keep_video is None:
1120                         # No clear decision yet, let IE decide
1121                         keep_video = keep_video_wish
1122             except PostProcessingError as e:
1123                 self.report_error(e.msg)
1124         if keep_video is False and not self.params.get('keepvideo', False):
1125             try:
1126                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1127                 os.remove(encodeFilename(filename))
1128             except (IOError, OSError):
1129                 self.report_warning('Unable to remove downloaded video file')
1130
1131     def _make_archive_id(self, info_dict):
1132         # Future-proof against any change in case
1133         # and backwards compatibility with prior versions
1134         extractor = info_dict.get('extractor_key')
1135         if extractor is None:
1136             if 'id' in info_dict:
1137                 extractor = info_dict.get('ie_key')  # key in a playlist
1138         if extractor is None:
1139             return None  # Incomplete video information
1140         return extractor.lower() + ' ' + info_dict['id']
1141
1142     def in_download_archive(self, info_dict):
1143         fn = self.params.get('download_archive')
1144         if fn is None:
1145             return False
1146
1147         vid_id = self._make_archive_id(info_dict)
1148         if vid_id is None:
1149             return False  # Incomplete video information
1150
1151         try:
1152             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1153                 for line in archive_file:
1154                     if line.strip() == vid_id:
1155                         return True
1156         except IOError as ioe:
1157             if ioe.errno != errno.ENOENT:
1158                 raise
1159         return False
1160
1161     def record_download_archive(self, info_dict):
1162         fn = self.params.get('download_archive')
1163         if fn is None:
1164             return
1165         vid_id = self._make_archive_id(info_dict)
1166         assert vid_id
1167         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1168             archive_file.write(vid_id + '\n')
1169
1170     @staticmethod
1171     def format_resolution(format, default='unknown'):
1172         if format.get('vcodec') == 'none':
1173             return 'audio only'
1174         if format.get('resolution') is not None:
1175             return format['resolution']
1176         if format.get('height') is not None:
1177             if format.get('width') is not None:
1178                 res = '%sx%s' % (format['width'], format['height'])
1179             else:
1180                 res = '%sp' % format['height']
1181         elif format.get('width') is not None:
1182             res = '?x%d' % format['width']
1183         else:
1184             res = default
1185         return res
1186
1187     def _format_note(self, fdict):
1188         res = ''
1189         if fdict.get('ext') in ['f4f', 'f4m']:
1190             res += '(unsupported) '
1191         if fdict.get('format_note') is not None:
1192             res += fdict['format_note'] + ' '
1193         if fdict.get('tbr') is not None:
1194             res += '%4dk ' % fdict['tbr']
1195         if fdict.get('container') is not None:
1196             if res:
1197                 res += ', '
1198             res += '%s container' % fdict['container']
1199         if (fdict.get('vcodec') is not None and
1200                 fdict.get('vcodec') != 'none'):
1201             if res:
1202                 res += ', '
1203             res += fdict['vcodec']
1204             if fdict.get('vbr') is not None:
1205                 res += '@'
1206         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1207             res += 'video@'
1208         if fdict.get('vbr') is not None:
1209             res += '%4dk' % fdict['vbr']
1210         if fdict.get('acodec') is not None:
1211             if res:
1212                 res += ', '
1213             if fdict['acodec'] == 'none':
1214                 res += 'video only'
1215             else:
1216                 res += '%-5s' % fdict['acodec']
1217         elif fdict.get('abr') is not None:
1218             if res:
1219                 res += ', '
1220             res += 'audio'
1221         if fdict.get('abr') is not None:
1222             res += '@%3dk' % fdict['abr']
1223         if fdict.get('asr') is not None:
1224             res += ' (%5dHz)' % fdict['asr']
1225         if fdict.get('filesize') is not None:
1226             if res:
1227                 res += ', '
1228             res += format_bytes(fdict['filesize'])
1229         elif fdict.get('filesize_approx') is not None:
1230             if res:
1231                 res += ', '
1232             res += '~' + format_bytes(fdict['filesize_approx'])
1233         return res
1234
1235     def list_formats(self, info_dict):
1236         def line(format, idlen=20):
1237             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1238                 format['format_id'],
1239                 format['ext'],
1240                 self.format_resolution(format),
1241                 self._format_note(format),
1242             ))
1243
1244         formats = info_dict.get('formats', [info_dict])
1245         idlen = max(len('format code'),
1246                     max(len(f['format_id']) for f in formats))
1247         formats_s = [line(f, idlen) for f in formats]
1248         if len(formats) > 1:
1249             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1250             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1251
1252         header_line = line({
1253             'format_id': 'format code', 'ext': 'extension',
1254             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1255         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1256                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1257
1258     def urlopen(self, req):
1259         """ Start an HTTP download """
1260
1261         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1262         # always respected by websites, some tend to give out URLs with non percent-encoded
1263         # non-ASCII characters (see telemb.py, ard.py [#3412])
1264         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1265         # To work around aforementioned issue we will replace request's original URL with
1266         # percent-encoded one
1267         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1268         url = req if req_is_string else req.get_full_url()
1269         url_escaped = escape_url(url)
1270
1271         # Substitute URL if any change after escaping
1272         if url != url_escaped:
1273             if req_is_string:
1274                 req = url_escaped
1275             else:
1276                 req = compat_urllib_request.Request(
1277                     url_escaped, data=req.data, headers=req.headers,
1278                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1279
1280         return self._opener.open(req, timeout=self._socket_timeout)
1281
1282     def print_debug_header(self):
1283         if not self.params.get('verbose'):
1284             return
1285
1286         if type('') is not compat_str:
1287             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1288             self.report_warning(
1289                 'Your Python is broken! Update to a newer and supported version')
1290
1291         encoding_str = (
1292             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1293                 locale.getpreferredencoding(),
1294                 sys.getfilesystemencoding(),
1295                 sys.stdout.encoding,
1296                 self.get_encoding()))
1297         write_string(encoding_str, encoding=None)
1298
1299         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1300         try:
1301             sp = subprocess.Popen(
1302                 ['git', 'rev-parse', '--short', 'HEAD'],
1303                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1304                 cwd=os.path.dirname(os.path.abspath(__file__)))
1305             out, err = sp.communicate()
1306             out = out.decode().strip()
1307             if re.match('[0-9a-f]+', out):
1308                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1309         except:
1310             try:
1311                 sys.exc_clear()
1312             except:
1313                 pass
1314         self._write_string('[debug] Python version %s - %s\n' % (
1315             platform.python_version(), platform_name()))
1316
1317         exe_versions = FFmpegPostProcessor.get_versions()
1318         exe_str = ', '.join(
1319             '%s %s' % (exe, v)
1320             for exe, v in sorted(exe_versions.items())
1321             if v
1322         )
1323         if not exe_str:
1324             exe_str = 'none'
1325         self._write_string('[debug] exe versions: %s\n' % exe_str)
1326
1327         proxy_map = {}
1328         for handler in self._opener.handlers:
1329             if hasattr(handler, 'proxies'):
1330                 proxy_map.update(handler.proxies)
1331         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1332
1333     def _setup_opener(self):
1334         timeout_val = self.params.get('socket_timeout')
1335         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1336
1337         opts_cookiefile = self.params.get('cookiefile')
1338         opts_proxy = self.params.get('proxy')
1339
1340         if opts_cookiefile is None:
1341             self.cookiejar = compat_cookiejar.CookieJar()
1342         else:
1343             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1344                 opts_cookiefile)
1345             if os.access(opts_cookiefile, os.R_OK):
1346                 self.cookiejar.load()
1347
1348         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1349             self.cookiejar)
1350         if opts_proxy is not None:
1351             if opts_proxy == '':
1352                 proxies = {}
1353             else:
1354                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1355         else:
1356             proxies = compat_urllib_request.getproxies()
1357             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1358             if 'http' in proxies and 'https' not in proxies:
1359                 proxies['https'] = proxies['http']
1360         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1361
1362         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1363         https_handler = make_HTTPS_handler(
1364             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1365         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1366         opener = compat_urllib_request.build_opener(
1367             https_handler, proxy_handler, cookie_processor, ydlh)
1368         # Delete the default user-agent header, which would otherwise apply in
1369         # cases where our custom HTTP handler doesn't come into play
1370         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1371         opener.addheaders = []
1372         self._opener = opener
1373
1374     def encode(self, s):
1375         if isinstance(s, bytes):
1376             return s  # Already encoded
1377
1378         try:
1379             return s.encode(self.get_encoding())
1380         except UnicodeEncodeError as err:
1381             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1382             raise
1383
1384     def get_encoding(self):
1385         encoding = self.params.get('encoding')
1386         if encoding is None:
1387             encoding = preferredencoding()
1388         return encoding