youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import os
  14 import platform
  15 import re
  16 import shutil
  17 import subprocess
  18 import socket
  19 import sys
  20 import time
  21 import traceback
  22
  23 if os.name == 'nt':
  24     import ctypes
  25
  26 from .compat import (
  27     compat_cookiejar,
  28     compat_expanduser,
  29     compat_http_client,
  30     compat_kwargs,
  31     compat_str,
  32     compat_urllib_error,
  33     compat_urllib_request,
  34 )
  35 from .utils import (
  36     escape_url,
  37     ContentTooShortError,
  38     date_from_str,
  39     DateRange,
  40     DEFAULT_OUTTMPL,
  41     determine_ext,
  42     DownloadError,
  43     encodeFilename,
  44     ExtractorError,
  45     format_bytes,
  46     formatSeconds,
  47     get_term_width,
  48     locked_file,
  49     make_HTTPS_handler,
  50     MaxDownloadsReached,
  51     PagedList,
  52     PostProcessingError,
  53     platform_name,
  54     preferredencoding,
  55     SameFileError,
  56     sanitize_filename,
  57     subtitles_filename,
  58     takewhile_inclusive,
  59     UnavailableVideoError,
  60     url_basename,
  61     write_json_file,
  62     write_string,
  63     YoutubeDLHandler,
  64     prepend_extension,
  65     args_to_str,
  66     age_restricted,
  67 )
  68 from .cache import Cache
  69 from .extractor import get_info_extractor, gen_extractors
  70 from .downloader import get_suitable_downloader
  71 from .downloader.rtmp import rtmpdump_version
  72 from .postprocessor import (
  73     FFmpegMergerPP,
  74     FFmpegPostProcessor,
  75     get_postprocessor,
  76 )
  77 from .version import __version__
  78
  79
  80 class YoutubeDL(object):
  81     """YoutubeDL class.
  82
  83     YoutubeDL objects are the ones responsible of downloading the
  84     actual video file and writing it to disk if the user has requested
  85     it, among some other tasks. In most cases there should be one per
  86     program. As, given a video URL, the downloader doesn't know how to
  87     extract all the needed information, task that InfoExtractors do, it
  88     has to pass the URL to one of them.
  89
  90     For this, YoutubeDL objects have a method that allows
  91     InfoExtractors to be registered in a given order. When it is passed
  92     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  93     finds that reports being able to handle it. The InfoExtractor extracts
  94     all the information about the video or videos the URL refers to, and
  95     YoutubeDL process the extracted information, possibly using a File
  96     Downloader to download the video.
  97
  98     YoutubeDL objects accept a lot of parameters. In order not to saturate
  99     the object constructor with arguments, it receives a dictionary of
 100     options instead. These options are available through the params
 101     attribute for the InfoExtractors to use. The YoutubeDL also
 102     registers itself as the downloader in charge for the InfoExtractors
 103     that are added to it, so this is a "mutual registration".
 104
 105     Available options:
 106
 107     username:          Username for authentication purposes.
 108     password:          Password for authentication purposes.
 109     videopassword:     Password for acces a video.
 110     usenetrc:          Use netrc for authentication instead.
 111     verbose:           Print additional info to stdout.
 112     quiet:             Do not print messages to stdout.
 113     no_warnings:       Do not print out anything for warnings.
 114     forceurl:          Force printing final URL.
 115     forcetitle:        Force printing title.
 116     forceid:           Force printing ID.
 117     forcethumbnail:    Force printing thumbnail URL.
 118     forcedescription:  Force printing description.
 119     forcefilename:     Force printing final filename.
 120     forceduration:     Force printing duration.
 121     forcejson:         Force printing info_dict as JSON.
 122     dump_single_json:  Force printing the info_dict of the whole playlist
 123                        (or video) as a single JSON line.
 124     simulate:          Do not download the video files.
 125     format:            Video format code. See options.py for more information.
 126     format_limit:      Highest quality format to try.
 127     outtmpl:           Template for output names.
 128     restrictfilenames: Do not allow "&" and spaces in file names
 129     ignoreerrors:      Do not stop on download errors.
 130     nooverwrites:      Prevent overwriting files.
 131     playliststart:     Playlist item to start at.
 132     playlistend:       Playlist item to end at.
 133     playlistreverse:   Download playlist items in reverse order.
 134     matchtitle:        Download only matching titles.
 135     rejecttitle:       Reject downloads for matching titles.
 136     logger:            Log messages to a logging.Logger instance.
 137     logtostderr:       Log messages to stderr instead of stdout.
 138     writedescription:  Write the video description to a .description file
 139     writeinfojson:     Write the video description to a .info.json file
 140     writeannotations:  Write the video annotations to a .annotations.xml file
 141     writethumbnail:    Write the thumbnail image to a file
 142     writesubtitles:    Write the video subtitles to a file
 143     writeautomaticsub: Write the automatic subtitles to a file
 144     allsubtitles:      Downloads all the subtitles of the video
 145                        (requires writesubtitles or writeautomaticsub)
 146     listsubtitles:     Lists all available subtitles for the video
 147     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 148     subtitleslangs:    List of languages of the subtitles to download
 149     keepvideo:         Keep the video file after post-processing
 150     daterange:         A DateRange object, download only if the upload_date is in the range.
 151     skip_download:     Skip the actual download of the video file
 152     cachedir:          Location of the cache files in the filesystem.
 153                        False to disable filesystem cache.
 154     noplaylist:        Download single video instead of a playlist if in doubt.
 155     age_limit:         An integer representing the user's age in years.
 156                        Unsuitable videos for the given age are skipped.
 157     min_views:         An integer representing the minimum view count the video
 158                        must have in order to not be skipped.
 159                        Videos without view count information are always
 160                        downloaded. None for no limit.
 161     max_views:         An integer representing the maximum view count.
 162                        Videos that are more popular than that are not
 163                        downloaded.
 164                        Videos without view count information are always
 165                        downloaded. None for no limit.
 166     download_archive:  File name of a file where all downloads are recorded.
 167                        Videos already present in the file are not downloaded
 168                        again.
 169     cookiefile:        File name where cookies should be read from and dumped to.
 170     nocheckcertificate:Do not verify SSL certificates
 171     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 172                        At the moment, this is only supported by YouTube.
 173     proxy:             URL of the proxy server to use
 174     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 175     bidi_workaround:   Work around buggy terminals without bidirectional text
 176                        support, using fridibi
 177     debug_printtraffic:Print out sent and received HTTP traffic
 178     include_ads:       Download ads as well
 179     default_search:    Prepend this string if an input url is not valid.
 180                        'auto' for elaborate guessing
 181     encoding:          Use this encoding instead of the system-specified.
 182     extract_flat:      Do not resolve URLs, return the immediate result.
 183                        Pass in 'in_playlist' to only show this behavior for
 184                        playlist items.
 185     postprocessors:    A list of dictionaries, each with an entry
 186                        * key:  The name of the postprocessor. See
 187                                youtube_dl/postprocessor/__init__.py for a list.
 188                        as well as any further keyword arguments for the
 189                        postprocessor.
 190     progress_hooks:    A list of functions that get called on download
 191                        progress, with a dictionary with the entries
 192                        * filename: The final filename
 193                        * status: One of "downloading" and "finished"
 194
 195                        The dict may also have some of the following entries:
 196
 197                        * downloaded_bytes: Bytes on disk
 198                        * total_bytes: Size of the whole file, None if unknown
 199                        * tmpfilename: The filename we're currently writing to
 200                        * eta: The estimated time in seconds, None if unknown
 201                        * speed: The download speed in bytes/second, None if
 202                                 unknown
 203
 204                        Progress hooks are guaranteed to be called at least once
 205                        (with status "finished") if the download is successful.
 206
 207
 208     The following parameters are not used by YoutubeDL itself, they are used by
 209     the FileDownloader:
 210     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 211     noresizebuffer, retries, continuedl, noprogress, consoletitle
 212
 213     The following options are used by the post processors:
 214     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 215                        otherwise prefer avconv.
 216     exec_cmd:          Arbitrary command to run after downloading
 217     """
 218
 219     params = None
 220     _ies = []
 221     _pps = []
 222     _download_retcode = None
 223     _num_downloads = None
 224     _screen_file = None
 225
 226     def __init__(self, params=None, auto_init=True):
 227         """Create a FileDownloader object with the given options."""
 228         if params is None:
 229             params = {}
 230         self._ies = []
 231         self._ies_instances = {}
 232         self._pps = []
 233         self._progress_hooks = []
 234         self._download_retcode = 0
 235         self._num_downloads = 0
 236         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 237         self._err_file = sys.stderr
 238         self.params = params
 239         self.cache = Cache(self)
 240
 241         if params.get('bidi_workaround', False):
 242             try:
 243                 import pty
 244                 master, slave = pty.openpty()
 245                 width = get_term_width()
 246                 if width is None:
 247                     width_args = []
 248                 else:
 249                     width_args = ['-w', str(width)]
 250                 sp_kwargs = dict(
 251                     stdin=subprocess.PIPE,
 252                     stdout=slave,
 253                     stderr=self._err_file)
 254                 try:
 255                     self._output_process = subprocess.Popen(
 256                         ['bidiv'] + width_args, **sp_kwargs
 257                     )
 258                 except OSError:
 259                     self._output_process = subprocess.Popen(
 260                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 261                 self._output_channel = os.fdopen(master, 'rb')
 262             except OSError as ose:
 263                 if ose.errno == 2:
 264                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 265                 else:
 266                     raise
 267
 268         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 269                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 270                 and not params.get('restrictfilenames', False)):
 271             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 272             self.report_warning(
 273                 'Assuming --restrict-filenames since file system encoding '
 274                 'cannot encode all characters. '
 275                 'Set the LC_ALL environment variable to fix this.')
 276             self.params['restrictfilenames'] = True
 277
 278         if '%(stitle)s' in self.params.get('outtmpl', ''):
 279             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 280
 281         self._setup_opener()
 282
 283         if auto_init:
 284             self.print_debug_header()
 285             self.add_default_info_extractors()
 286
 287         for pp_def_raw in self.params.get('postprocessors', []):
 288             pp_class = get_postprocessor(pp_def_raw['key'])
 289             pp_def = dict(pp_def_raw)
 290             del pp_def['key']
 291             pp = pp_class(self, **compat_kwargs(pp_def))
 292             self.add_post_processor(pp)
 293
 294         for ph in self.params.get('progress_hooks', []):
 295             self.add_progress_hook(ph)
 296
 297     def warn_if_short_id(self, argv):
 298         # short YouTube ID starting with dash?
 299         idxs = [
 300             i for i, a in enumerate(argv)
 301             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 302         if idxs:
 303             correct_argv = (
 304                 ['youtube-dl'] +
 305                 [a for i, a in enumerate(argv) if i not in idxs] +
 306                 ['--'] + [argv[i] for i in idxs]
 307             )
 308             self.report_warning(
 309                 'Long argument string detected. '
 310                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 311                 args_to_str(correct_argv))
 312
 313     def add_info_extractor(self, ie):
 314         """Add an InfoExtractor object to the end of the list."""
 315         self._ies.append(ie)
 316         self._ies_instances[ie.ie_key()] = ie
 317         ie.set_downloader(self)
 318
 319     def get_info_extractor(self, ie_key):
 320         """
 321         Get an instance of an IE with name ie_key, it will try to get one from
 322         the _ies list, if there's no instance it will create a new one and add
 323         it to the extractor list.
 324         """
 325         ie = self._ies_instances.get(ie_key)
 326         if ie is None:
 327             ie = get_info_extractor(ie_key)()
 328             self.add_info_extractor(ie)
 329         return ie
 330
 331     def add_default_info_extractors(self):
 332         """
 333         Add the InfoExtractors returned by gen_extractors to the end of the list
 334         """
 335         for ie in gen_extractors():
 336             self.add_info_extractor(ie)
 337
 338     def add_post_processor(self, pp):
 339         """Add a PostProcessor object to the end of the chain."""
 340         self._pps.append(pp)
 341         pp.set_downloader(self)
 342
 343     def add_progress_hook(self, ph):
 344         """Add the progress hook (currently only for the file downloader)"""
 345         self._progress_hooks.append(ph)
 346
 347     def _bidi_workaround(self, message):
 348         if not hasattr(self, '_output_channel'):
 349             return message
 350
 351         assert hasattr(self, '_output_process')
 352         assert isinstance(message, compat_str)
 353         line_count = message.count('\n') + 1
 354         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 355         self._output_process.stdin.flush()
 356         res = ''.join(self._output_channel.readline().decode('utf-8')
 357                       for _ in range(line_count))
 358         return res[:-len('\n')]
 359
 360     def to_screen(self, message, skip_eol=False):
 361         """Print message to stdout if not in quiet mode."""
 362         return self.to_stdout(message, skip_eol, check_quiet=True)
 363
 364     def _write_string(self, s, out=None):
 365         write_string(s, out=out, encoding=self.params.get('encoding'))
 366
 367     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 368         """Print message to stdout if not in quiet mode."""
 369         if self.params.get('logger'):
 370             self.params['logger'].debug(message)
 371         elif not check_quiet or not self.params.get('quiet', False):
 372             message = self._bidi_workaround(message)
 373             terminator = ['\n', ''][skip_eol]
 374             output = message + terminator
 375
 376             self._write_string(output, self._screen_file)
 377
 378     def to_stderr(self, message):
 379         """Print message to stderr."""
 380         assert isinstance(message, compat_str)
 381         if self.params.get('logger'):
 382             self.params['logger'].error(message)
 383         else:
 384             message = self._bidi_workaround(message)
 385             output = message + '\n'
 386             self._write_string(output, self._err_file)
 387
 388     def to_console_title(self, message):
 389         if not self.params.get('consoletitle', False):
 390             return
 391         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 392             # c_wchar_p() might not be necessary if `message` is
 393             # already of type unicode()
 394             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 395         elif 'TERM' in os.environ:
 396             self._write_string('\033]0;%s\007' % message, self._screen_file)
 397
 398     def save_console_title(self):
 399         if not self.params.get('consoletitle', False):
 400             return
 401         if 'TERM' in os.environ:
 402             # Save the title on stack
 403             self._write_string('\033[22;0t', self._screen_file)
 404
 405     def restore_console_title(self):
 406         if not self.params.get('consoletitle', False):
 407             return
 408         if 'TERM' in os.environ:
 409             # Restore the title from stack
 410             self._write_string('\033[23;0t', self._screen_file)
 411
 412     def __enter__(self):
 413         self.save_console_title()
 414         return self
 415
 416     def __exit__(self, *args):
 417         self.restore_console_title()
 418
 419         if self.params.get('cookiefile') is not None:
 420             self.cookiejar.save()
 421
 422     def trouble(self, message=None, tb=None):
 423         """Determine action to take when a download problem appears.
 424
 425         Depending on if the downloader has been configured to ignore
 426         download errors or not, this method may throw an exception or
 427         not when errors are found, after printing the message.
 428
 429         tb, if given, is additional traceback information.
 430         """
 431         if message is not None:
 432             self.to_stderr(message)
 433         if self.params.get('verbose'):
 434             if tb is None:
 435                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 436                     tb = ''
 437                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 438                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 439                     tb += compat_str(traceback.format_exc())
 440                 else:
 441                     tb_data = traceback.format_list(traceback.extract_stack())
 442                     tb = ''.join(tb_data)
 443             self.to_stderr(tb)
 444         if not self.params.get('ignoreerrors', False):
 445             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 446                 exc_info = sys.exc_info()[1].exc_info
 447             else:
 448                 exc_info = sys.exc_info()
 449             raise DownloadError(message, exc_info)
 450         self._download_retcode = 1
 451
 452     def report_warning(self, message):
 453         '''
 454         Print the message to stderr, it will be prefixed with 'WARNING:'
 455         If stderr is a tty file the 'WARNING:' will be colored
 456         '''
 457         if self.params.get('logger') is not None:
 458             self.params['logger'].warning(message)
 459         else:
 460             if self.params.get('no_warnings'):
 461                 return
 462             if self._err_file.isatty() and os.name != 'nt':
 463                 _msg_header = '\033[0;33mWARNING:\033[0m'
 464             else:
 465                 _msg_header = 'WARNING:'
 466             warning_message = '%s %s' % (_msg_header, message)
 467             self.to_stderr(warning_message)
 468
 469     def report_error(self, message, tb=None):
 470         '''
 471         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 472         in red if stderr is a tty file.
 473         '''
 474         if self._err_file.isatty() and os.name != 'nt':
 475             _msg_header = '\033[0;31mERROR:\033[0m'
 476         else:
 477             _msg_header = 'ERROR:'
 478         error_message = '%s %s' % (_msg_header, message)
 479         self.trouble(error_message, tb)
 480
 481     def report_file_already_downloaded(self, file_name):
 482         """Report file has already been fully downloaded."""
 483         try:
 484             self.to_screen('[download] %s has already been downloaded' % file_name)
 485         except UnicodeEncodeError:
 486             self.to_screen('[download] The file has already been downloaded')
 487
 488     def prepare_filename(self, info_dict):
 489         """Generate the output filename."""
 490         try:
 491             template_dict = dict(info_dict)
 492
 493             template_dict['epoch'] = int(time.time())
 494             autonumber_size = self.params.get('autonumber_size')
 495             if autonumber_size is None:
 496                 autonumber_size = 5
 497             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 498             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 499             if template_dict.get('playlist_index') is not None:
 500                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 501             if template_dict.get('resolution') is None:
 502                 if template_dict.get('width') and template_dict.get('height'):
 503                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 504                 elif template_dict.get('height'):
 505                     template_dict['resolution'] = '%sp' % template_dict['height']
 506                 elif template_dict.get('width'):
 507                     template_dict['resolution'] = '?x%d' % template_dict['width']
 508
 509             sanitize = lambda k, v: sanitize_filename(
 510                 compat_str(v),
 511                 restricted=self.params.get('restrictfilenames'),
 512                 is_id=(k == 'id'))
 513             template_dict = dict((k, sanitize(k, v))
 514                                  for k, v in template_dict.items()
 515                                  if v is not None)
 516             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 517
 518             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 519             tmpl = compat_expanduser(outtmpl)
 520             filename = tmpl % template_dict
 521             return filename
 522         except ValueError as err:
 523             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 524             return None
 525
 526     def _match_entry(self, info_dict):
 527         """ Returns None iff the file should be downloaded """
 528
 529         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 530         if 'title' in info_dict:
 531             # This can happen when we're just evaluating the playlist
 532             title = info_dict['title']
 533             matchtitle = self.params.get('matchtitle', False)
 534             if matchtitle:
 535                 if not re.search(matchtitle, title, re.IGNORECASE):
 536                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 537             rejecttitle = self.params.get('rejecttitle', False)
 538             if rejecttitle:
 539                 if re.search(rejecttitle, title, re.IGNORECASE):
 540                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 541         date = info_dict.get('upload_date', None)
 542         if date is not None:
 543             dateRange = self.params.get('daterange', DateRange())
 544             if date not in dateRange:
 545                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 546         view_count = info_dict.get('view_count', None)
 547         if view_count is not None:
 548             min_views = self.params.get('min_views')
 549             if min_views is not None and view_count < min_views:
 550                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 551             max_views = self.params.get('max_views')
 552             if max_views is not None and view_count > max_views:
 553                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 554         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 555             return 'Skipping "%s" because it is age restricted' % title
 556         if self.in_download_archive(info_dict):
 557             return '%s has already been recorded in archive' % video_title
 558         return None
 559
 560     @staticmethod
 561     def add_extra_info(info_dict, extra_info):
 562         '''Set the keys from extra_info in info dict if they are missing'''
 563         for key, value in extra_info.items():
 564             info_dict.setdefault(key, value)
 565
 566     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 567                      process=True):
 568         '''
 569         Returns a list with a dictionary for each video we find.
 570         If 'download', also downloads the videos.
 571         extra_info is a dict containing the extra values to add to each result
 572          '''
 573
 574         if ie_key:
 575             ies = [self.get_info_extractor(ie_key)]
 576         else:
 577             ies = self._ies
 578
 579         for ie in ies:
 580             if not ie.suitable(url):
 581                 continue
 582
 583             if not ie.working():
 584                 self.report_warning('The program functionality for this site has been marked as broken, '
 585                                     'and will probably not work.')
 586
 587             try:
 588                 ie_result = ie.extract(url)
 589                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 590                     break
 591                 if isinstance(ie_result, list):
 592                     # Backwards compatibility: old IE result format
 593                     ie_result = {
 594                         '_type': 'compat_list',
 595                         'entries': ie_result,
 596                     }
 597                 self.add_default_extra_info(ie_result, ie, url)
 598                 if process:
 599                     return self.process_ie_result(ie_result, download, extra_info)
 600                 else:
 601                     return ie_result
 602             except ExtractorError as de:  # An error we somewhat expected
 603                 self.report_error(compat_str(de), de.format_traceback())
 604                 break
 605             except MaxDownloadsReached:
 606                 raise
 607             except Exception as e:
 608                 if self.params.get('ignoreerrors', False):
 609                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 610                     break
 611                 else:
 612                     raise
 613         else:
 614             self.report_error('no suitable InfoExtractor for URL %s' % url)
 615
 616     def add_default_extra_info(self, ie_result, ie, url):
 617         self.add_extra_info(ie_result, {
 618             'extractor': ie.IE_NAME,
 619             'webpage_url': url,
 620             'webpage_url_basename': url_basename(url),
 621             'extractor_key': ie.ie_key(),
 622         })
 623
 624     def process_ie_result(self, ie_result, download=True, extra_info={}):
 625         """
 626         Take the result of the ie(may be modified) and resolve all unresolved
 627         references (URLs, playlist items).
 628
 629         It will also download the videos if 'download'.
 630         Returns the resolved ie_result.
 631         """
 632
 633         result_type = ie_result.get('_type', 'video')
 634
 635         if result_type in ('url', 'url_transparent'):
 636             extract_flat = self.params.get('extract_flat', False)
 637             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 638                     extract_flat is True):
 639                 if self.params.get('forcejson', False):
 640                     self.to_stdout(json.dumps(ie_result))
 641                 return ie_result
 642
 643         if result_type == 'video':
 644             self.add_extra_info(ie_result, extra_info)
 645             return self.process_video_result(ie_result, download=download)
 646         elif result_type == 'url':
 647             # We have to add extra_info to the results because it may be
 648             # contained in a playlist
 649             return self.extract_info(ie_result['url'],
 650                                      download,
 651                                      ie_key=ie_result.get('ie_key'),
 652                                      extra_info=extra_info)
 653         elif result_type == 'url_transparent':
 654             # Use the information from the embedding page
 655             info = self.extract_info(
 656                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 657                 extra_info=extra_info, download=False, process=False)
 658
 659             force_properties = dict(
 660                 (k, v) for k, v in ie_result.items() if v is not None)
 661             for f in ('_type', 'url'):
 662                 if f in force_properties:
 663                     del force_properties[f]
 664             new_result = info.copy()
 665             new_result.update(force_properties)
 666
 667             assert new_result.get('_type') != 'url_transparent'
 668
 669             return self.process_ie_result(
 670                 new_result, download=download, extra_info=extra_info)
 671         elif result_type == 'playlist' or result_type == 'multi_video':
 672             # We process each entry in the playlist
 673             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 674             self.to_screen('[download] Downloading playlist: %s' % playlist)
 675
 676             playlist_results = []
 677
 678             playliststart = self.params.get('playliststart', 1) - 1
 679             playlistend = self.params.get('playlistend', None)
 680             # For backwards compatibility, interpret -1 as whole list
 681             if playlistend == -1:
 682                 playlistend = None
 683
 684             ie_entries = ie_result['entries']
 685             if isinstance(ie_entries, list):
 686                 n_all_entries = len(ie_entries)
 687                 entries = ie_entries[playliststart:playlistend]
 688                 n_entries = len(entries)
 689                 self.to_screen(
 690                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 691                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 692             elif isinstance(ie_entries, PagedList):
 693                 entries = ie_entries.getslice(
 694                     playliststart, playlistend)
 695                 n_entries = len(entries)
 696                 self.to_screen(
 697                     "[%s] playlist %s: Downloading %d videos" %
 698                     (ie_result['extractor'], playlist, n_entries))
 699             else:  # iterable
 700                 entries = list(itertools.islice(
 701                     ie_entries, playliststart, playlistend))
 702                 n_entries = len(entries)
 703                 self.to_screen(
 704                     "[%s] playlist %s: Downloading %d videos" %
 705                     (ie_result['extractor'], playlist, n_entries))
 706
 707             if self.params.get('playlistreverse', False):
 708                 entries = entries[::-1]
 709
 710             for i, entry in enumerate(entries, 1):
 711                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 712                 extra = {
 713                     'n_entries': n_entries,
 714                     'playlist': playlist,
 715                     'playlist_id': ie_result.get('id'),
 716                     'playlist_title': ie_result.get('title'),
 717                     'playlist_index': i + playliststart,
 718                     'extractor': ie_result['extractor'],
 719                     'webpage_url': ie_result['webpage_url'],
 720                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 721                     'extractor_key': ie_result['extractor_key'],
 722                 }
 723
 724                 reason = self._match_entry(entry)
 725                 if reason is not None:
 726                     self.to_screen('[download] ' + reason)
 727                     continue
 728
 729                 entry_result = self.process_ie_result(entry,
 730                                                       download=download,
 731                                                       extra_info=extra)
 732                 playlist_results.append(entry_result)
 733             ie_result['entries'] = playlist_results
 734             return ie_result
 735         elif result_type == 'compat_list':
 736             self.report_warning(
 737                 'Extractor %s returned a compat_list result. '
 738                 'It needs to be updated.' % ie_result.get('extractor'))
 739
 740             def _fixup(r):
 741                 self.add_extra_info(
 742                     r,
 743                     {
 744                         'extractor': ie_result['extractor'],
 745                         'webpage_url': ie_result['webpage_url'],
 746                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 747                         'extractor_key': ie_result['extractor_key'],
 748                     }
 749                 )
 750                 return r
 751             ie_result['entries'] = [
 752                 self.process_ie_result(_fixup(r), download, extra_info)
 753                 for r in ie_result['entries']
 754             ]
 755             return ie_result
 756         else:
 757             raise Exception('Invalid result type: %s' % result_type)
 758
 759     def select_format(self, format_spec, available_formats):
 760         if format_spec == 'best' or format_spec is None:
 761             return available_formats[-1]
 762         elif format_spec == 'worst':
 763             return available_formats[0]
 764         elif format_spec == 'bestaudio':
 765             audio_formats = [
 766                 f for f in available_formats
 767                 if f.get('vcodec') == 'none']
 768             if audio_formats:
 769                 return audio_formats[-1]
 770         elif format_spec == 'worstaudio':
 771             audio_formats = [
 772                 f for f in available_formats
 773                 if f.get('vcodec') == 'none']
 774             if audio_formats:
 775                 return audio_formats[0]
 776         elif format_spec == 'bestvideo':
 777             video_formats = [
 778                 f for f in available_formats
 779                 if f.get('acodec') == 'none']
 780             if video_formats:
 781                 return video_formats[-1]
 782         elif format_spec == 'worstvideo':
 783             video_formats = [
 784                 f for f in available_formats
 785                 if f.get('acodec') == 'none']
 786             if video_formats:
 787                 return video_formats[0]
 788         else:
 789             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 790             if format_spec in extensions:
 791                 filter_f = lambda f: f['ext'] == format_spec
 792             else:
 793                 filter_f = lambda f: f['format_id'] == format_spec
 794             matches = list(filter(filter_f, available_formats))
 795             if matches:
 796                 return matches[-1]
 797         return None
 798
 799     def process_video_result(self, info_dict, download=True):
 800         assert info_dict.get('_type', 'video') == 'video'
 801
 802         if 'id' not in info_dict:
 803             raise ExtractorError('Missing "id" field in extractor result')
 804         if 'title' not in info_dict:
 805             raise ExtractorError('Missing "title" field in extractor result')
 806
 807         if 'playlist' not in info_dict:
 808             # It isn't part of a playlist
 809             info_dict['playlist'] = None
 810             info_dict['playlist_index'] = None
 811
 812         thumbnails = info_dict.get('thumbnails')
 813         if thumbnails:
 814             thumbnails.sort(key=lambda t: (
 815                 t.get('width'), t.get('height'), t.get('url')))
 816             for t in thumbnails:
 817                 if 'width' in t and 'height' in t:
 818                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 819
 820         if thumbnails and 'thumbnail' not in info_dict:
 821             info_dict['thumbnail'] = thumbnails[-1]['url']
 822
 823         if 'display_id' not in info_dict and 'id' in info_dict:
 824             info_dict['display_id'] = info_dict['id']
 825
 826         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 827             # Working around negative timestamps in Windows
 828             # (see http://bugs.python.org/issue1646728)
 829             if info_dict['timestamp'] < 0 and os.name == 'nt':
 830                 info_dict['timestamp'] = 0
 831             upload_date = datetime.datetime.utcfromtimestamp(
 832                 info_dict['timestamp'])
 833             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 834
 835         # This extractors handle format selection themselves
 836         if info_dict['extractor'] in ['Youku']:
 837             if download:
 838                 self.process_info(info_dict)
 839             return info_dict
 840
 841         # We now pick which formats have to be downloaded
 842         if info_dict.get('formats') is None:
 843             # There's only one format available
 844             formats = [info_dict]
 845         else:
 846             formats = info_dict['formats']
 847
 848         if not formats:
 849             raise ExtractorError('No video formats found!')
 850
 851         # We check that all the formats have the format and format_id fields
 852         for i, format in enumerate(formats):
 853             if 'url' not in format:
 854                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 855
 856             if format.get('format_id') is None:
 857                 format['format_id'] = compat_str(i)
 858             if format.get('format') is None:
 859                 format['format'] = '{id} - {res}{note}'.format(
 860                     id=format['format_id'],
 861                     res=self.format_resolution(format),
 862                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 863                 )
 864             # Automatically determine file extension if missing
 865             if 'ext' not in format:
 866                 format['ext'] = determine_ext(format['url']).lower()
 867
 868         format_limit = self.params.get('format_limit', None)
 869         if format_limit:
 870             formats = list(takewhile_inclusive(
 871                 lambda f: f['format_id'] != format_limit, formats
 872             ))
 873
 874         # TODO Central sorting goes here
 875
 876         if formats[0] is not info_dict:
 877             # only set the 'formats' fields if the original info_dict list them
 878             # otherwise we end up with a circular reference, the first (and unique)
 879             # element in the 'formats' field in info_dict is info_dict itself,
 880             # wich can't be exported to json
 881             info_dict['formats'] = formats
 882         if self.params.get('listformats', None):
 883             self.list_formats(info_dict)
 884             return
 885
 886         req_format = self.params.get('format')
 887         if req_format is None:
 888             req_format = 'best'
 889         formats_to_download = []
 890         # The -1 is for supporting YoutubeIE
 891         if req_format in ('-1', 'all'):
 892             formats_to_download = formats
 893         else:
 894             for rfstr in req_format.split(','):
 895                 # We can accept formats requested in the format: 34/5/best, we pick
 896                 # the first that is available, starting from left
 897                 req_formats = rfstr.split('/')
 898                 for rf in req_formats:
 899                     if re.match(r'.+?\+.+?', rf) is not None:
 900                         # Two formats have been requested like '137+139'
 901                         format_1, format_2 = rf.split('+')
 902                         formats_info = (self.select_format(format_1, formats),
 903                                         self.select_format(format_2, formats))
 904                         if all(formats_info):
 905                             # The first format must contain the video and the
 906                             # second the audio
 907                             if formats_info[0].get('vcodec') == 'none':
 908                                 self.report_error('The first format must '
 909                                                   'contain the video, try using '
 910                                                   '"-f %s+%s"' % (format_2, format_1))
 911                                 return
 912                             selected_format = {
 913                                 'requested_formats': formats_info,
 914                                 'format': rf,
 915                                 'ext': formats_info[0]['ext'],
 916                                 'width': formats_info[0].get('width'),
 917                                 'height': formats_info[0].get('height'),
 918                                 'resolution': formats_info[0].get('resolution'),
 919                                 'fps': formats_info[0].get('fps'),
 920                                 'vcodec': formats_info[0].get('vcodec'),
 921                                 'vbr': formats_info[0].get('vbr'),
 922                                 'acodec': formats_info[1].get('acodec'),
 923                                 'abr': formats_info[1].get('abr'),
 924                             }
 925                         else:
 926                             selected_format = None
 927                     else:
 928                         selected_format = self.select_format(rf, formats)
 929                     if selected_format is not None:
 930                         formats_to_download.append(selected_format)
 931                         break
 932         if not formats_to_download:
 933             raise ExtractorError('requested format not available',
 934                                  expected=True)
 935
 936         if download:
 937             if len(formats_to_download) > 1:
 938                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 939             for format in formats_to_download:
 940                 new_info = dict(info_dict)
 941                 new_info.update(format)
 942                 self.process_info(new_info)
 943         # We update the info dict with the best quality format (backwards compatibility)
 944         info_dict.update(formats_to_download[-1])
 945         return info_dict
 946
 947     def process_info(self, info_dict):
 948         """Process a single resolved IE result."""
 949
 950         assert info_dict.get('_type', 'video') == 'video'
 951
 952         max_downloads = self.params.get('max_downloads')
 953         if max_downloads is not None:
 954             if self._num_downloads >= int(max_downloads):
 955                 raise MaxDownloadsReached()
 956
 957         info_dict['fulltitle'] = info_dict['title']
 958         if len(info_dict['title']) > 200:
 959             info_dict['title'] = info_dict['title'][:197] + '...'
 960
 961         # Keep for backwards compatibility
 962         info_dict['stitle'] = info_dict['title']
 963
 964         if 'format' not in info_dict:
 965             info_dict['format'] = info_dict['ext']
 966
 967         reason = self._match_entry(info_dict)
 968         if reason is not None:
 969             self.to_screen('[download] ' + reason)
 970             return
 971
 972         self._num_downloads += 1
 973
 974         filename = self.prepare_filename(info_dict)
 975
 976         # Forced printings
 977         if self.params.get('forcetitle', False):
 978             self.to_stdout(info_dict['fulltitle'])
 979         if self.params.get('forceid', False):
 980             self.to_stdout(info_dict['id'])
 981         if self.params.get('forceurl', False):
 982             if info_dict.get('requested_formats') is not None:
 983                 for f in info_dict['requested_formats']:
 984                     self.to_stdout(f['url'] + f.get('play_path', ''))
 985             else:
 986                 # For RTMP URLs, also include the playpath
 987                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 988         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 989             self.to_stdout(info_dict['thumbnail'])
 990         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 991             self.to_stdout(info_dict['description'])
 992         if self.params.get('forcefilename', False) and filename is not None:
 993             self.to_stdout(filename)
 994         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 995             self.to_stdout(formatSeconds(info_dict['duration']))
 996         if self.params.get('forceformat', False):
 997             self.to_stdout(info_dict['format'])
 998         if self.params.get('forcejson', False):
 999             info_dict['_filename'] = filename
1000             self.to_stdout(json.dumps(info_dict))
1001         if self.params.get('dump_single_json', False):
1002             info_dict['_filename'] = filename
1003
1004         # Do nothing else if in simulate mode
1005         if self.params.get('simulate', False):
1006             return
1007
1008         if filename is None:
1009             return
1010
1011         try:
1012             dn = os.path.dirname(encodeFilename(filename))
1013             if dn and not os.path.exists(dn):
1014                 os.makedirs(dn)
1015         except (OSError, IOError) as err:
1016             self.report_error('unable to create directory ' + compat_str(err))
1017             return
1018
1019         if self.params.get('writedescription', False):
1020             descfn = filename + '.description'
1021             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1022                 self.to_screen('[info] Video description is already present')
1023             elif info_dict.get('description') is None:
1024                 self.report_warning('There\'s no description to write.')
1025             else:
1026                 try:
1027                     self.to_screen('[info] Writing video description to: ' + descfn)
1028                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1029                         descfile.write(info_dict['description'])
1030                 except (OSError, IOError):
1031                     self.report_error('Cannot write description file ' + descfn)
1032                     return
1033
1034         if self.params.get('writeannotations', False):
1035             annofn = filename + '.annotations.xml'
1036             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1037                 self.to_screen('[info] Video annotations are already present')
1038             else:
1039                 try:
1040                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1041                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1042                         annofile.write(info_dict['annotations'])
1043                 except (KeyError, TypeError):
1044                     self.report_warning('There are no annotations to write.')
1045                 except (OSError, IOError):
1046                     self.report_error('Cannot write annotations file: ' + annofn)
1047                     return
1048
1049         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1050                                        self.params.get('writeautomaticsub')])
1051
1052         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1053             # subtitles download errors are already managed as troubles in relevant IE
1054             # that way it will silently go on when used with unsupporting IE
1055             subtitles = info_dict['subtitles']
1056             sub_format = self.params.get('subtitlesformat', 'srt')
1057             for sub_lang in subtitles.keys():
1058                 sub = subtitles[sub_lang]
1059                 if sub is None:
1060                     continue
1061                 try:
1062                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1063                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1064                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1065                     else:
1066                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1067                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1068                             subfile.write(sub)
1069                 except (OSError, IOError):
1070                     self.report_error('Cannot write subtitles file ' + sub_filename)
1071                     return
1072
1073         if self.params.get('writeinfojson', False):
1074             infofn = os.path.splitext(filename)[0] + '.info.json'
1075             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1076                 self.to_screen('[info] Video description metadata is already present')
1077             else:
1078                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1079                 try:
1080                     write_json_file(info_dict, infofn)
1081                 except (OSError, IOError):
1082                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1083                     return
1084
1085         if self.params.get('writethumbnail', False):
1086             if info_dict.get('thumbnail') is not None:
1087                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1088                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1089                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1090                     self.to_screen('[%s] %s: Thumbnail is already present' %
1091                                    (info_dict['extractor'], info_dict['id']))
1092                 else:
1093                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1094                                    (info_dict['extractor'], info_dict['id']))
1095                     try:
1096                         uf = self.urlopen(info_dict['thumbnail'])
1097                         with open(thumb_filename, 'wb') as thumbf:
1098                             shutil.copyfileobj(uf, thumbf)
1099                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1100                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1101                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1102                         self.report_warning('Unable to download thumbnail "%s": %s' %
1103                                             (info_dict['thumbnail'], compat_str(err)))
1104
1105         if not self.params.get('skip_download', False):
1106             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1107                 success = True
1108             else:
1109                 try:
1110                     def dl(name, info):
1111                         fd = get_suitable_downloader(info)(self, self.params)
1112                         for ph in self._progress_hooks:
1113                             fd.add_progress_hook(ph)
1114                         if self.params.get('verbose'):
1115                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1116                         return fd.download(name, info)
1117                     if info_dict.get('requested_formats') is not None:
1118                         downloaded = []
1119                         success = True
1120                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1121                         if not merger._executable:
1122                             postprocessors = []
1123                             self.report_warning('You have requested multiple '
1124                                                 'formats but ffmpeg or avconv are not installed.'
1125                                                 ' The formats won\'t be merged')
1126                         else:
1127                             postprocessors = [merger]
1128                         for f in info_dict['requested_formats']:
1129                             new_info = dict(info_dict)
1130                             new_info.update(f)
1131                             fname = self.prepare_filename(new_info)
1132                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1133                             downloaded.append(fname)
1134                             partial_success = dl(fname, new_info)
1135                             success = success and partial_success
1136                         info_dict['__postprocessors'] = postprocessors
1137                         info_dict['__files_to_merge'] = downloaded
1138                     else:
1139                         # Just a single file
1140                         success = dl(filename, info_dict)
1141                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1142                     self.report_error('unable to download video data: %s' % str(err))
1143                     return
1144                 except (OSError, IOError) as err:
1145                     raise UnavailableVideoError(err)
1146                 except (ContentTooShortError, ) as err:
1147                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1148                     return
1149
1150             if success:
1151                 try:
1152                     self.post_process(filename, info_dict)
1153                 except (PostProcessingError) as err:
1154                     self.report_error('postprocessing: %s' % str(err))
1155                     return
1156                 self.record_download_archive(info_dict)
1157
1158     def download(self, url_list):
1159         """Download a given list of URLs."""
1160         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1161         if (len(url_list) > 1 and
1162                 '%' not in outtmpl
1163                 and self.params.get('max_downloads') != 1):
1164             raise SameFileError(outtmpl)
1165
1166         for url in url_list:
1167             try:
1168                 # It also downloads the videos
1169                 res = self.extract_info(url)
1170             except UnavailableVideoError:
1171                 self.report_error('unable to download video')
1172             except MaxDownloadsReached:
1173                 self.to_screen('[info] Maximum number of downloaded files reached.')
1174                 raise
1175             else:
1176                 if self.params.get('dump_single_json', False):
1177                     self.to_stdout(json.dumps(res))
1178
1179         return self._download_retcode
1180
1181     def download_with_info_file(self, info_filename):
1182         with io.open(info_filename, 'r', encoding='utf-8') as f:
1183             info = json.load(f)
1184         try:
1185             self.process_ie_result(info, download=True)
1186         except DownloadError:
1187             webpage_url = info.get('webpage_url')
1188             if webpage_url is not None:
1189                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1190                 return self.download([webpage_url])
1191             else:
1192                 raise
1193         return self._download_retcode
1194
1195     def post_process(self, filename, ie_info):
1196         """Run all the postprocessors on the given file."""
1197         info = dict(ie_info)
1198         info['filepath'] = filename
1199         keep_video = None
1200         pps_chain = []
1201         if ie_info.get('__postprocessors') is not None:
1202             pps_chain.extend(ie_info['__postprocessors'])
1203         pps_chain.extend(self._pps)
1204         for pp in pps_chain:
1205             try:
1206                 keep_video_wish, new_info = pp.run(info)
1207                 if keep_video_wish is not None:
1208                     if keep_video_wish:
1209                         keep_video = keep_video_wish
1210                     elif keep_video is None:
1211                         # No clear decision yet, let IE decide
1212                         keep_video = keep_video_wish
1213             except PostProcessingError as e:
1214                 self.report_error(e.msg)
1215         if keep_video is False and not self.params.get('keepvideo', False):
1216             try:
1217                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1218                 os.remove(encodeFilename(filename))
1219             except (IOError, OSError):
1220                 self.report_warning('Unable to remove downloaded video file')
1221
1222     def _make_archive_id(self, info_dict):
1223         # Future-proof against any change in case
1224         # and backwards compatibility with prior versions
1225         extractor = info_dict.get('extractor_key')
1226         if extractor is None:
1227             if 'id' in info_dict:
1228                 extractor = info_dict.get('ie_key')  # key in a playlist
1229         if extractor is None:
1230             return None  # Incomplete video information
1231         return extractor.lower() + ' ' + info_dict['id']
1232
1233     def in_download_archive(self, info_dict):
1234         fn = self.params.get('download_archive')
1235         if fn is None:
1236             return False
1237
1238         vid_id = self._make_archive_id(info_dict)
1239         if vid_id is None:
1240             return False  # Incomplete video information
1241
1242         try:
1243             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1244                 for line in archive_file:
1245                     if line.strip() == vid_id:
1246                         return True
1247         except IOError as ioe:
1248             if ioe.errno != errno.ENOENT:
1249                 raise
1250         return False
1251
1252     def record_download_archive(self, info_dict):
1253         fn = self.params.get('download_archive')
1254         if fn is None:
1255             return
1256         vid_id = self._make_archive_id(info_dict)
1257         assert vid_id
1258         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1259             archive_file.write(vid_id + '\n')
1260
1261     @staticmethod
1262     def format_resolution(format, default='unknown'):
1263         if format.get('vcodec') == 'none':
1264             return 'audio only'
1265         if format.get('resolution') is not None:
1266             return format['resolution']
1267         if format.get('height') is not None:
1268             if format.get('width') is not None:
1269                 res = '%sx%s' % (format['width'], format['height'])
1270             else:
1271                 res = '%sp' % format['height']
1272         elif format.get('width') is not None:
1273             res = '?x%d' % format['width']
1274         else:
1275             res = default
1276         return res
1277
1278     def _format_note(self, fdict):
1279         res = ''
1280         if fdict.get('ext') in ['f4f', 'f4m']:
1281             res += '(unsupported) '
1282         if fdict.get('format_note') is not None:
1283             res += fdict['format_note'] + ' '
1284         if fdict.get('tbr') is not None:
1285             res += '%4dk ' % fdict['tbr']
1286         if fdict.get('container') is not None:
1287             if res:
1288                 res += ', '
1289             res += '%s container' % fdict['container']
1290         if (fdict.get('vcodec') is not None and
1291                 fdict.get('vcodec') != 'none'):
1292             if res:
1293                 res += ', '
1294             res += fdict['vcodec']
1295             if fdict.get('vbr') is not None:
1296                 res += '@'
1297         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1298             res += 'video@'
1299         if fdict.get('vbr') is not None:
1300             res += '%4dk' % fdict['vbr']
1301         if fdict.get('fps') is not None:
1302             res += ', %sfps' % fdict['fps']
1303         if fdict.get('acodec') is not None:
1304             if res:
1305                 res += ', '
1306             if fdict['acodec'] == 'none':
1307                 res += 'video only'
1308             else:
1309                 res += '%-5s' % fdict['acodec']
1310         elif fdict.get('abr') is not None:
1311             if res:
1312                 res += ', '
1313             res += 'audio'
1314         if fdict.get('abr') is not None:
1315             res += '@%3dk' % fdict['abr']
1316         if fdict.get('asr') is not None:
1317             res += ' (%5dHz)' % fdict['asr']
1318         if fdict.get('filesize') is not None:
1319             if res:
1320                 res += ', '
1321             res += format_bytes(fdict['filesize'])
1322         elif fdict.get('filesize_approx') is not None:
1323             if res:
1324                 res += ', '
1325             res += '~' + format_bytes(fdict['filesize_approx'])
1326         return res
1327
1328     def list_formats(self, info_dict):
1329         def line(format, idlen=20):
1330             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1331                 format['format_id'],
1332                 format['ext'],
1333                 self.format_resolution(format),
1334                 self._format_note(format),
1335             ))
1336
1337         formats = info_dict.get('formats', [info_dict])
1338         idlen = max(len('format code'),
1339                     max(len(f['format_id']) for f in formats))
1340         formats_s = [
1341             line(f, idlen) for f in formats
1342             if f.get('preference') is None or f['preference'] >= -1000]
1343         if len(formats) > 1:
1344             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1345             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1346
1347         header_line = line({
1348             'format_id': 'format code', 'ext': 'extension',
1349             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1350         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1351                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1352
1353     def urlopen(self, req):
1354         """ Start an HTTP download """
1355
1356         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1357         # always respected by websites, some tend to give out URLs with non percent-encoded
1358         # non-ASCII characters (see telemb.py, ard.py [#3412])
1359         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1360         # To work around aforementioned issue we will replace request's original URL with
1361         # percent-encoded one
1362         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1363         url = req if req_is_string else req.get_full_url()
1364         url_escaped = escape_url(url)
1365
1366         # Substitute URL if any change after escaping
1367         if url != url_escaped:
1368             if req_is_string:
1369                 req = url_escaped
1370             else:
1371                 req = compat_urllib_request.Request(
1372                     url_escaped, data=req.data, headers=req.headers,
1373                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1374
1375         return self._opener.open(req, timeout=self._socket_timeout)
1376
1377     def print_debug_header(self):
1378         if not self.params.get('verbose'):
1379             return
1380
1381         if type('') is not compat_str:
1382             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1383             self.report_warning(
1384                 'Your Python is broken! Update to a newer and supported version')
1385
1386         stdout_encoding = getattr(
1387             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1388         encoding_str = (
1389             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1390                 locale.getpreferredencoding(),
1391                 sys.getfilesystemencoding(),
1392                 stdout_encoding,
1393                 self.get_encoding()))
1394         write_string(encoding_str, encoding=None)
1395
1396         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1397         try:
1398             sp = subprocess.Popen(
1399                 ['git', 'rev-parse', '--short', 'HEAD'],
1400                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1401                 cwd=os.path.dirname(os.path.abspath(__file__)))
1402             out, err = sp.communicate()
1403             out = out.decode().strip()
1404             if re.match('[0-9a-f]+', out):
1405                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1406         except:
1407             try:
1408                 sys.exc_clear()
1409             except:
1410                 pass
1411         self._write_string('[debug] Python version %s - %s\n' % (
1412             platform.python_version(), platform_name()))
1413
1414         exe_versions = FFmpegPostProcessor.get_versions()
1415         exe_versions['rtmpdump'] = rtmpdump_version()
1416         exe_str = ', '.join(
1417             '%s %s' % (exe, v)
1418             for exe, v in sorted(exe_versions.items())
1419             if v
1420         )
1421         if not exe_str:
1422             exe_str = 'none'
1423         self._write_string('[debug] exe versions: %s\n' % exe_str)
1424
1425         proxy_map = {}
1426         for handler in self._opener.handlers:
1427             if hasattr(handler, 'proxies'):
1428                 proxy_map.update(handler.proxies)
1429         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1430
1431     def _setup_opener(self):
1432         timeout_val = self.params.get('socket_timeout')
1433         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1434
1435         opts_cookiefile = self.params.get('cookiefile')
1436         opts_proxy = self.params.get('proxy')
1437
1438         if opts_cookiefile is None:
1439             self.cookiejar = compat_cookiejar.CookieJar()
1440         else:
1441             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1442                 opts_cookiefile)
1443             if os.access(opts_cookiefile, os.R_OK):
1444                 self.cookiejar.load()
1445
1446         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1447             self.cookiejar)
1448         if opts_proxy is not None:
1449             if opts_proxy == '':
1450                 proxies = {}
1451             else:
1452                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1453         else:
1454             proxies = compat_urllib_request.getproxies()
1455             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1456             if 'http' in proxies and 'https' not in proxies:
1457                 proxies['https'] = proxies['http']
1458         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1459
1460         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1461         https_handler = make_HTTPS_handler(
1462             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1463         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1464         opener = compat_urllib_request.build_opener(
1465             https_handler, proxy_handler, cookie_processor, ydlh)
1466         # Delete the default user-agent header, which would otherwise apply in
1467         # cases where our custom HTTP handler doesn't come into play
1468         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1469         opener.addheaders = []
1470         self._opener = opener
1471
1472     def encode(self, s):
1473         if isinstance(s, bytes):
1474             return s  # Already encoded
1475
1476         try:
1477             return s.encode(self.get_encoding())
1478         except UnicodeEncodeError as err:
1479             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1480             raise
1481
1482     def get_encoding(self):
1483         encoding = self.params.get('encoding')
1484         if encoding is None:
1485             encoding = preferredencoding()
1486         return encoding