youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for accessing a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     force_generic_extractor: Force downloader to use the generic extractor
 143     nooverwrites:      Prevent overwriting files.
 144     playliststart:     Playlist item to start at.
 145     playlistend:       Playlist item to end at.
 146     playlist_items:    Specific indices of playlist to download.
 147     playlistreverse:   Download playlist items in reverse order.
 148     matchtitle:        Download only matching titles.
 149     rejecttitle:       Reject downloads for matching titles.
 150     logger:            Log messages to a logging.Logger instance.
 151     logtostderr:       Log messages to stderr instead of stdout.
 152     writedescription:  Write the video description to a .description file
 153     writeinfojson:     Write the video description to a .info.json file
 154     writeannotations:  Write the video annotations to a .annotations.xml file
 155     writethumbnail:    Write the thumbnail image to a file
 156     write_all_thumbnails:  Write all thumbnail formats to files
 157     writesubtitles:    Write the video subtitles to a file
 158     writeautomaticsub: Write the automatic subtitles to a file
 159     allsubtitles:      Downloads all the subtitles of the video
 160                        (requires writesubtitles or writeautomaticsub)
 161     listsubtitles:     Lists all available subtitles for the video
 162     subtitlesformat:   The format code for subtitles
 163     subtitleslangs:    List of languages of the subtitles to download
 164     keepvideo:         Keep the video file after post-processing
 165     daterange:         A DateRange object, download only if the upload_date is in the range.
 166     skip_download:     Skip the actual download of the video file
 167     cachedir:          Location of the cache files in the filesystem.
 168                        False to disable filesystem cache.
 169     noplaylist:        Download single video instead of a playlist if in doubt.
 170     age_limit:         An integer representing the user's age in years.
 171                        Unsuitable videos for the given age are skipped.
 172     min_views:         An integer representing the minimum view count the video
 173                        must have in order to not be skipped.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     max_views:         An integer representing the maximum view count.
 177                        Videos that are more popular than that are not
 178                        downloaded.
 179                        Videos without view count information are always
 180                        downloaded. None for no limit.
 181     download_archive:  File name of a file where all downloads are recorded.
 182                        Videos already present in the file are not downloaded
 183                        again.
 184     cookiefile:        File name where cookies should be read from and dumped to.
 185     nocheckcertificate:Do not verify SSL certificates
 186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 187                        At the moment, this is only supported by YouTube.
 188     proxy:             URL of the proxy server to use
 189     cn_verification_proxy:  URL of the proxy to use for IP address verification
 190                        on Chinese sites. (Experimental)
 191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 192     bidi_workaround:   Work around buggy terminals without bidirectional text
 193                        support, using fridibi
 194     debug_printtraffic:Print out sent and received HTTP traffic
 195     include_ads:       Download ads as well
 196     default_search:    Prepend this string if an input url is not valid.
 197                        'auto' for elaborate guessing
 198     encoding:          Use this encoding instead of the system-specified.
 199     extract_flat:      Do not resolve URLs, return the immediate result.
 200                        Pass in 'in_playlist' to only show this behavior for
 201                        playlist items.
 202     postprocessors:    A list of dictionaries, each with an entry
 203                        * key:  The name of the postprocessor. See
 204                                youtube_dl/postprocessor/__init__.py for a list.
 205                        as well as any further keyword arguments for the
 206                        postprocessor.
 207     progress_hooks:    A list of functions that get called on download
 208                        progress, with a dictionary with the entries
 209                        * status: One of "downloading", "error", or "finished".
 210                                  Check this first and ignore unknown values.
 211
 212                        If status is one of "downloading", or "finished", the
 213                        following properties may also be present:
 214                        * filename: The final filename (always present)
 215                        * tmpfilename: The filename we're currently writing to
 216                        * downloaded_bytes: Bytes on disk
 217                        * total_bytes: Size of the whole file, None if unknown
 218                        * total_bytes_estimate: Guess of the eventual file size,
 219                                                None if unavailable.
 220                        * elapsed: The number of seconds since download started.
 221                        * eta: The estimated time in seconds, None if unknown
 222                        * speed: The download speed in bytes/second, None if
 223                                 unknown
 224                        * fragment_index: The counter of the currently
 225                                          downloaded video fragment.
 226                        * fragment_count: The number of fragments (= individual
 227                                          files that will be merged)
 228
 229                        Progress hooks are guaranteed to be called at least once
 230                        (with status "finished") if the download is successful.
 231     merge_output_format: Extension to use when merging formats.
 232     fixup:             Automatically correct known faults of the file.
 233                        One of:
 234                        - "never": do nothing
 235                        - "warn": only emit a warning
 236                        - "detect_or_warn": check whether we can do anything
 237                                            about it, warn otherwise (default)
 238     source_address:    (Experimental) Client-side IP address to bind to.
 239     call_home:         Boolean, true iff we are allowed to contact the
 240                        youtube-dl servers for debugging.
 241     sleep_interval:    Number of seconds to sleep before each download.
 242     listformats:       Print an overview of available video formats and exit.
 243     list_thumbnails:   Print a table of all thumbnails and exit.
 244     match_filter:      A function that gets called with the info_dict of
 245                        every video.
 246                        If it returns a message, the video is ignored.
 247                        If it returns None, the video is downloaded.
 248                        match_filter_func in utils.py is one example for this.
 249     no_color:          Do not emit color codes in output.
 250
 251     The following options determine which downloader is picked:
 252     external_downloader: Executable of the external downloader to call.
 253                        None or unset for standard (built-in) downloader.
 254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 255
 256     The following parameters are not used by YoutubeDL itself, they are used by
 257     the downloader (see youtube_dl/downloader/common.py):
 258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 260     xattr_set_filesize, external_downloader_args.
 261
 262     The following options are used by the post processors:
 263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 264                        otherwise prefer avconv.
 265     postprocessor_args: A list of additional command-line arguments for the
 266                         postprocessor.
 267     """
 268
 269     params = None
 270     _ies = []
 271     _pps = []
 272     _download_retcode = None
 273     _num_downloads = None
 274     _screen_file = None
 275
 276     def __init__(self, params=None, auto_init=True):
 277         """Create a FileDownloader object with the given options."""
 278         if params is None:
 279             params = {}
 280         self._ies = []
 281         self._ies_instances = {}
 282         self._pps = []
 283         self._progress_hooks = []
 284         self._download_retcode = 0
 285         self._num_downloads = 0
 286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 287         self._err_file = sys.stderr
 288         self.params = params
 289         self.cache = Cache(self)
 290
 291         if params.get('bidi_workaround', False):
 292             try:
 293                 import pty
 294                 master, slave = pty.openpty()
 295                 width = compat_get_terminal_size().columns
 296                 if width is None:
 297                     width_args = []
 298                 else:
 299                     width_args = ['-w', str(width)]
 300                 sp_kwargs = dict(
 301                     stdin=subprocess.PIPE,
 302                     stdout=slave,
 303                     stderr=self._err_file)
 304                 try:
 305                     self._output_process = subprocess.Popen(
 306                         ['bidiv'] + width_args, **sp_kwargs
 307                     )
 308                 except OSError:
 309                     self._output_process = subprocess.Popen(
 310                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 311                 self._output_channel = os.fdopen(master, 'rb')
 312             except OSError as ose:
 313                 if ose.errno == 2:
 314                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 315                 else:
 316                     raise
 317
 318         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 319                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 320                 not params.get('restrictfilenames', False)):
 321             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 322             self.report_warning(
 323                 'Assuming --restrict-filenames since file system encoding '
 324                 'cannot encode all characters. '
 325                 'Set the LC_ALL environment variable to fix this.')
 326             self.params['restrictfilenames'] = True
 327
 328         if isinstance(params.get('outtmpl'), bytes):
 329             self.report_warning(
 330                 'Parameter outtmpl is bytes, but should be a unicode string. '
 331                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 332
 333         self._setup_opener()
 334
 335         if auto_init:
 336             self.print_debug_header()
 337             self.add_default_info_extractors()
 338
 339         for pp_def_raw in self.params.get('postprocessors', []):
 340             pp_class = get_postprocessor(pp_def_raw['key'])
 341             pp_def = dict(pp_def_raw)
 342             del pp_def['key']
 343             pp = pp_class(self, **compat_kwargs(pp_def))
 344             self.add_post_processor(pp)
 345
 346         for ph in self.params.get('progress_hooks', []):
 347             self.add_progress_hook(ph)
 348
 349     def warn_if_short_id(self, argv):
 350         # short YouTube ID starting with dash?
 351         idxs = [
 352             i for i, a in enumerate(argv)
 353             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 354         if idxs:
 355             correct_argv = (
 356                 ['youtube-dl'] +
 357                 [a for i, a in enumerate(argv) if i not in idxs] +
 358                 ['--'] + [argv[i] for i in idxs]
 359             )
 360             self.report_warning(
 361                 'Long argument string detected. '
 362                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 363                 args_to_str(correct_argv))
 364
 365     def add_info_extractor(self, ie):
 366         """Add an InfoExtractor object to the end of the list."""
 367         self._ies.append(ie)
 368         self._ies_instances[ie.ie_key()] = ie
 369         ie.set_downloader(self)
 370
 371     def get_info_extractor(self, ie_key):
 372         """
 373         Get an instance of an IE with name ie_key, it will try to get one from
 374         the _ies list, if there's no instance it will create a new one and add
 375         it to the extractor list.
 376         """
 377         ie = self._ies_instances.get(ie_key)
 378         if ie is None:
 379             ie = get_info_extractor(ie_key)()
 380             self.add_info_extractor(ie)
 381         return ie
 382
 383     def add_default_info_extractors(self):
 384         """
 385         Add the InfoExtractors returned by gen_extractors to the end of the list
 386         """
 387         for ie in gen_extractors():
 388             self.add_info_extractor(ie)
 389
 390     def add_post_processor(self, pp):
 391         """Add a PostProcessor object to the end of the chain."""
 392         self._pps.append(pp)
 393         pp.set_downloader(self)
 394
 395     def add_progress_hook(self, ph):
 396         """Add the progress hook (currently only for the file downloader)"""
 397         self._progress_hooks.append(ph)
 398
 399     def _bidi_workaround(self, message):
 400         if not hasattr(self, '_output_channel'):
 401             return message
 402
 403         assert hasattr(self, '_output_process')
 404         assert isinstance(message, compat_str)
 405         line_count = message.count('\n') + 1
 406         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 407         self._output_process.stdin.flush()
 408         res = ''.join(self._output_channel.readline().decode('utf-8')
 409                       for _ in range(line_count))
 410         return res[:-len('\n')]
 411
 412     def to_screen(self, message, skip_eol=False):
 413         """Print message to stdout if not in quiet mode."""
 414         return self.to_stdout(message, skip_eol, check_quiet=True)
 415
 416     def _write_string(self, s, out=None):
 417         write_string(s, out=out, encoding=self.params.get('encoding'))
 418
 419     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 420         """Print message to stdout if not in quiet mode."""
 421         if self.params.get('logger'):
 422             self.params['logger'].debug(message)
 423         elif not check_quiet or not self.params.get('quiet', False):
 424             message = self._bidi_workaround(message)
 425             terminator = ['\n', ''][skip_eol]
 426             output = message + terminator
 427
 428             self._write_string(output, self._screen_file)
 429
 430     def to_stderr(self, message):
 431         """Print message to stderr."""
 432         assert isinstance(message, compat_str)
 433         if self.params.get('logger'):
 434             self.params['logger'].error(message)
 435         else:
 436             message = self._bidi_workaround(message)
 437             output = message + '\n'
 438             self._write_string(output, self._err_file)
 439
 440     def to_console_title(self, message):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 444             # c_wchar_p() might not be necessary if `message` is
 445             # already of type unicode()
 446             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 447         elif 'TERM' in os.environ:
 448             self._write_string('\033]0;%s\007' % message, self._screen_file)
 449
 450     def save_console_title(self):
 451         if not self.params.get('consoletitle', False):
 452             return
 453         if 'TERM' in os.environ:
 454             # Save the title on stack
 455             self._write_string('\033[22;0t', self._screen_file)
 456
 457     def restore_console_title(self):
 458         if not self.params.get('consoletitle', False):
 459             return
 460         if 'TERM' in os.environ:
 461             # Restore the title from stack
 462             self._write_string('\033[23;0t', self._screen_file)
 463
 464     def __enter__(self):
 465         self.save_console_title()
 466         return self
 467
 468     def __exit__(self, *args):
 469         self.restore_console_title()
 470
 471         if self.params.get('cookiefile') is not None:
 472             self.cookiejar.save()
 473
 474     def trouble(self, message=None, tb=None):
 475         """Determine action to take when a download problem appears.
 476
 477         Depending on if the downloader has been configured to ignore
 478         download errors or not, this method may throw an exception or
 479         not when errors are found, after printing the message.
 480
 481         tb, if given, is additional traceback information.
 482         """
 483         if message is not None:
 484             self.to_stderr(message)
 485         if self.params.get('verbose'):
 486             if tb is None:
 487                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 488                     tb = ''
 489                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 490                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 491                     tb += compat_str(traceback.format_exc())
 492                 else:
 493                     tb_data = traceback.format_list(traceback.extract_stack())
 494                     tb = ''.join(tb_data)
 495             self.to_stderr(tb)
 496         if not self.params.get('ignoreerrors', False):
 497             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 498                 exc_info = sys.exc_info()[1].exc_info
 499             else:
 500                 exc_info = sys.exc_info()
 501             raise DownloadError(message, exc_info)
 502         self._download_retcode = 1
 503
 504     def report_warning(self, message):
 505         '''
 506         Print the message to stderr, it will be prefixed with 'WARNING:'
 507         If stderr is a tty file the 'WARNING:' will be colored
 508         '''
 509         if self.params.get('logger') is not None:
 510             self.params['logger'].warning(message)
 511         else:
 512             if self.params.get('no_warnings'):
 513                 return
 514             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 515                 _msg_header = '\033[0;33mWARNING:\033[0m'
 516             else:
 517                 _msg_header = 'WARNING:'
 518             warning_message = '%s %s' % (_msg_header, message)
 519             self.to_stderr(warning_message)
 520
 521     def report_error(self, message, tb=None):
 522         '''
 523         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 524         in red if stderr is a tty file.
 525         '''
 526         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 527             _msg_header = '\033[0;31mERROR:\033[0m'
 528         else:
 529             _msg_header = 'ERROR:'
 530         error_message = '%s %s' % (_msg_header, message)
 531         self.trouble(error_message, tb)
 532
 533     def report_file_already_downloaded(self, file_name):
 534         """Report file has already been fully downloaded."""
 535         try:
 536             self.to_screen('[download] %s has already been downloaded' % file_name)
 537         except UnicodeEncodeError:
 538             self.to_screen('[download] The file has already been downloaded')
 539
 540     def prepare_filename(self, info_dict):
 541         """Generate the output filename."""
 542         try:
 543             template_dict = dict(info_dict)
 544
 545             template_dict['epoch'] = int(time.time())
 546             autonumber_size = self.params.get('autonumber_size')
 547             if autonumber_size is None:
 548                 autonumber_size = 5
 549             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 550             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 551             if template_dict.get('playlist_index') is not None:
 552                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 553             if template_dict.get('resolution') is None:
 554                 if template_dict.get('width') and template_dict.get('height'):
 555                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 556                 elif template_dict.get('height'):
 557                     template_dict['resolution'] = '%sp' % template_dict['height']
 558                 elif template_dict.get('width'):
 559                     template_dict['resolution'] = '?x%d' % template_dict['width']
 560
 561             sanitize = lambda k, v: sanitize_filename(
 562                 compat_str(v),
 563                 restricted=self.params.get('restrictfilenames'),
 564                 is_id=(k == 'id'))
 565             template_dict = dict((k, sanitize(k, v))
 566                                  for k, v in template_dict.items()
 567                                  if v is not None)
 568             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 569
 570             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 571             tmpl = compat_expanduser(outtmpl)
 572             filename = tmpl % template_dict
 573             # Temporary fix for #4787
 574             # 'Treat' all problem characters by passing filename through preferredencoding
 575             # to workaround encoding issues with subprocess on python2 @ Windows
 576             if sys.version_info < (3, 0) and sys.platform == 'win32':
 577                 filename = encodeFilename(filename, True).decode(preferredencoding())
 578             return filename
 579         except ValueError as err:
 580             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 581             return None
 582
 583     def _match_entry(self, info_dict, incomplete):
 584         """ Returns None iff the file should be downloaded """
 585
 586         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 587         if 'title' in info_dict:
 588             # This can happen when we're just evaluating the playlist
 589             title = info_dict['title']
 590             matchtitle = self.params.get('matchtitle', False)
 591             if matchtitle:
 592                 if not re.search(matchtitle, title, re.IGNORECASE):
 593                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 594             rejecttitle = self.params.get('rejecttitle', False)
 595             if rejecttitle:
 596                 if re.search(rejecttitle, title, re.IGNORECASE):
 597                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 598         date = info_dict.get('upload_date', None)
 599         if date is not None:
 600             dateRange = self.params.get('daterange', DateRange())
 601             if date not in dateRange:
 602                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 603         view_count = info_dict.get('view_count', None)
 604         if view_count is not None:
 605             min_views = self.params.get('min_views')
 606             if min_views is not None and view_count < min_views:
 607                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 608             max_views = self.params.get('max_views')
 609             if max_views is not None and view_count > max_views:
 610                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 611         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 612             return 'Skipping "%s" because it is age restricted' % video_title
 613         if self.in_download_archive(info_dict):
 614             return '%s has already been recorded in archive' % video_title
 615
 616         if not incomplete:
 617             match_filter = self.params.get('match_filter')
 618             if match_filter is not None:
 619                 ret = match_filter(info_dict)
 620                 if ret is not None:
 621                     return ret
 622
 623         return None
 624
 625     @staticmethod
 626     def add_extra_info(info_dict, extra_info):
 627         '''Set the keys from extra_info in info dict if they are missing'''
 628         for key, value in extra_info.items():
 629             info_dict.setdefault(key, value)
 630
 631     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 632                      process=True, force_generic_extractor=False):
 633         '''
 634         Returns a list with a dictionary for each video we find.
 635         If 'download', also downloads the videos.
 636         extra_info is a dict containing the extra values to add to each result
 637         '''
 638
 639         if not ie_key and force_generic_extractor:
 640             ie_key = 'Generic'
 641
 642         if ie_key:
 643             ies = [self.get_info_extractor(ie_key)]
 644         else:
 645             ies = self._ies
 646
 647         for ie in ies:
 648             if not ie.suitable(url):
 649                 continue
 650
 651             if not ie.working():
 652                 self.report_warning('The program functionality for this site has been marked as broken, '
 653                                     'and will probably not work.')
 654
 655             try:
 656                 ie_result = ie.extract(url)
 657                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 658                     break
 659                 if isinstance(ie_result, list):
 660                     # Backwards compatibility: old IE result format
 661                     ie_result = {
 662                         '_type': 'compat_list',
 663                         'entries': ie_result,
 664                     }
 665                 self.add_default_extra_info(ie_result, ie, url)
 666                 if process:
 667                     return self.process_ie_result(ie_result, download, extra_info)
 668                 else:
 669                     return ie_result
 670             except ExtractorError as de:  # An error we somewhat expected
 671                 self.report_error(compat_str(de), de.format_traceback())
 672                 break
 673             except MaxDownloadsReached:
 674                 raise
 675             except Exception as e:
 676                 if self.params.get('ignoreerrors', False):
 677                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 678                     break
 679                 else:
 680                     raise
 681         else:
 682             self.report_error('no suitable InfoExtractor for URL %s' % url)
 683
 684     def add_default_extra_info(self, ie_result, ie, url):
 685         self.add_extra_info(ie_result, {
 686             'extractor': ie.IE_NAME,
 687             'webpage_url': url,
 688             'webpage_url_basename': url_basename(url),
 689             'extractor_key': ie.ie_key(),
 690         })
 691
 692     def process_ie_result(self, ie_result, download=True, extra_info={}):
 693         """
 694         Take the result of the ie(may be modified) and resolve all unresolved
 695         references (URLs, playlist items).
 696
 697         It will also download the videos if 'download'.
 698         Returns the resolved ie_result.
 699         """
 700
 701         result_type = ie_result.get('_type', 'video')
 702
 703         if result_type in ('url', 'url_transparent'):
 704             extract_flat = self.params.get('extract_flat', False)
 705             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 706                     extract_flat is True):
 707                 if self.params.get('forcejson', False):
 708                     self.to_stdout(json.dumps(ie_result))
 709                 return ie_result
 710
 711         if result_type == 'video':
 712             self.add_extra_info(ie_result, extra_info)
 713             return self.process_video_result(ie_result, download=download)
 714         elif result_type == 'url':
 715             # We have to add extra_info to the results because it may be
 716             # contained in a playlist
 717             return self.extract_info(ie_result['url'],
 718                                      download,
 719                                      ie_key=ie_result.get('ie_key'),
 720                                      extra_info=extra_info)
 721         elif result_type == 'url_transparent':
 722             # Use the information from the embedding page
 723             info = self.extract_info(
 724                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 725                 extra_info=extra_info, download=False, process=False)
 726
 727             force_properties = dict(
 728                 (k, v) for k, v in ie_result.items() if v is not None)
 729             for f in ('_type', 'url'):
 730                 if f in force_properties:
 731                     del force_properties[f]
 732             new_result = info.copy()
 733             new_result.update(force_properties)
 734
 735             assert new_result.get('_type') != 'url_transparent'
 736
 737             return self.process_ie_result(
 738                 new_result, download=download, extra_info=extra_info)
 739         elif result_type == 'playlist' or result_type == 'multi_video':
 740             # We process each entry in the playlist
 741             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 742             self.to_screen('[download] Downloading playlist: %s' % playlist)
 743
 744             playlist_results = []
 745
 746             playliststart = self.params.get('playliststart', 1) - 1
 747             playlistend = self.params.get('playlistend', None)
 748             # For backwards compatibility, interpret -1 as whole list
 749             if playlistend == -1:
 750                 playlistend = None
 751
 752             playlistitems_str = self.params.get('playlist_items', None)
 753             playlistitems = None
 754             if playlistitems_str is not None:
 755                 def iter_playlistitems(format):
 756                     for string_segment in format.split(','):
 757                         if '-' in string_segment:
 758                             start, end = string_segment.split('-')
 759                             for item in range(int(start), int(end) + 1):
 760                                 yield int(item)
 761                         else:
 762                             yield int(string_segment)
 763                 playlistitems = iter_playlistitems(playlistitems_str)
 764
 765             ie_entries = ie_result['entries']
 766             if isinstance(ie_entries, list):
 767                 n_all_entries = len(ie_entries)
 768                 if playlistitems:
 769                     entries = [
 770                         ie_entries[i - 1] for i in playlistitems
 771                         if -n_all_entries <= i - 1 < n_all_entries]
 772                 else:
 773                     entries = ie_entries[playliststart:playlistend]
 774                 n_entries = len(entries)
 775                 self.to_screen(
 776                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 777                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 778             elif isinstance(ie_entries, PagedList):
 779                 if playlistitems:
 780                     entries = []
 781                     for item in playlistitems:
 782                         entries.extend(ie_entries.getslice(
 783                             item - 1, item
 784                         ))
 785                 else:
 786                     entries = ie_entries.getslice(
 787                         playliststart, playlistend)
 788                 n_entries = len(entries)
 789                 self.to_screen(
 790                     "[%s] playlist %s: Downloading %d videos" %
 791                     (ie_result['extractor'], playlist, n_entries))
 792             else:  # iterable
 793                 if playlistitems:
 794                     entry_list = list(ie_entries)
 795                     entries = [entry_list[i - 1] for i in playlistitems]
 796                 else:
 797                     entries = list(itertools.islice(
 798                         ie_entries, playliststart, playlistend))
 799                 n_entries = len(entries)
 800                 self.to_screen(
 801                     "[%s] playlist %s: Downloading %d videos" %
 802                     (ie_result['extractor'], playlist, n_entries))
 803
 804             if self.params.get('playlistreverse', False):
 805                 entries = entries[::-1]
 806
 807             for i, entry in enumerate(entries, 1):
 808                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 809                 extra = {
 810                     'n_entries': n_entries,
 811                     'playlist': playlist,
 812                     'playlist_id': ie_result.get('id'),
 813                     'playlist_title': ie_result.get('title'),
 814                     'playlist_index': i + playliststart,
 815                     'extractor': ie_result['extractor'],
 816                     'webpage_url': ie_result['webpage_url'],
 817                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 818                     'extractor_key': ie_result['extractor_key'],
 819                 }
 820
 821                 reason = self._match_entry(entry, incomplete=True)
 822                 if reason is not None:
 823                     self.to_screen('[download] ' + reason)
 824                     continue
 825
 826                 entry_result = self.process_ie_result(entry,
 827                                                       download=download,
 828                                                       extra_info=extra)
 829                 playlist_results.append(entry_result)
 830             ie_result['entries'] = playlist_results
 831             return ie_result
 832         elif result_type == 'compat_list':
 833             self.report_warning(
 834                 'Extractor %s returned a compat_list result. '
 835                 'It needs to be updated.' % ie_result.get('extractor'))
 836
 837             def _fixup(r):
 838                 self.add_extra_info(
 839                     r,
 840                     {
 841                         'extractor': ie_result['extractor'],
 842                         'webpage_url': ie_result['webpage_url'],
 843                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 844                         'extractor_key': ie_result['extractor_key'],
 845                     }
 846                 )
 847                 return r
 848             ie_result['entries'] = [
 849                 self.process_ie_result(_fixup(r), download, extra_info)
 850                 for r in ie_result['entries']
 851             ]
 852             return ie_result
 853         else:
 854             raise Exception('Invalid result type: %s' % result_type)
 855
 856     def _apply_format_filter(self, format_spec, available_formats):
 857         " Returns a tuple of the remaining format_spec and filtered formats "
 858
 859         OPERATORS = {
 860             '<': operator.lt,
 861             '<=': operator.le,
 862             '>': operator.gt,
 863             '>=': operator.ge,
 864             '=': operator.eq,
 865             '!=': operator.ne,
 866         }
 867         operator_rex = re.compile(r'''(?x)\s*\[
 868             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 869             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 870             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 871             \]$
 872             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 873         m = operator_rex.search(format_spec)
 874         if m:
 875             try:
 876                 comparison_value = int(m.group('value'))
 877             except ValueError:
 878                 comparison_value = parse_filesize(m.group('value'))
 879                 if comparison_value is None:
 880                     comparison_value = parse_filesize(m.group('value') + 'B')
 881                 if comparison_value is None:
 882                     raise ValueError(
 883                         'Invalid value %r in format specification %r' % (
 884                             m.group('value'), format_spec))
 885             op = OPERATORS[m.group('op')]
 886
 887         if not m:
 888             STR_OPERATORS = {
 889                 '=': operator.eq,
 890                 '!=': operator.ne,
 891             }
 892             str_operator_rex = re.compile(r'''(?x)\s*\[
 893                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 894                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 895                 \s*(?P<value>[a-zA-Z0-9_-]+)
 896                 \s*\]$
 897                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 898             m = str_operator_rex.search(format_spec)
 899             if m:
 900                 comparison_value = m.group('value')
 901                 op = STR_OPERATORS[m.group('op')]
 902
 903         if not m:
 904             raise ValueError('Invalid format specification %r' % format_spec)
 905
 906         def _filter(f):
 907             actual_value = f.get(m.group('key'))
 908             if actual_value is None:
 909                 return m.group('none_inclusive')
 910             return op(actual_value, comparison_value)
 911         new_formats = [f for f in available_formats if _filter(f)]
 912
 913         new_format_spec = format_spec[:-len(m.group(0))]
 914         if not new_format_spec:
 915             new_format_spec = 'best'
 916
 917         return (new_format_spec, new_formats)
 918
 919     def select_format(self, format_spec, available_formats):
 920         while format_spec.endswith(']'):
 921             format_spec, available_formats = self._apply_format_filter(
 922                 format_spec, available_formats)
 923         if not available_formats:
 924             return None
 925
 926         if format_spec in ['best', 'worst', None]:
 927             format_idx = 0 if format_spec == 'worst' else -1
 928             audiovideo_formats = [
 929                 f for f in available_formats
 930                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 931             if audiovideo_formats:
 932                 return audiovideo_formats[format_idx]
 933             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 934             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 935                   all(f.get('vcodec') != 'none' for f in available_formats)):
 936                 return available_formats[format_idx]
 937         elif format_spec == 'bestaudio':
 938             audio_formats = [
 939                 f for f in available_formats
 940                 if f.get('vcodec') == 'none']
 941             if audio_formats:
 942                 return audio_formats[-1]
 943         elif format_spec == 'worstaudio':
 944             audio_formats = [
 945                 f for f in available_formats
 946                 if f.get('vcodec') == 'none']
 947             if audio_formats:
 948                 return audio_formats[0]
 949         elif format_spec == 'bestvideo':
 950             video_formats = [
 951                 f for f in available_formats
 952                 if f.get('acodec') == 'none']
 953             if video_formats:
 954                 return video_formats[-1]
 955         elif format_spec == 'worstvideo':
 956             video_formats = [
 957                 f for f in available_formats
 958                 if f.get('acodec') == 'none']
 959             if video_formats:
 960                 return video_formats[0]
 961         else:
 962             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 963             if format_spec in extensions:
 964                 filter_f = lambda f: f['ext'] == format_spec
 965             else:
 966                 filter_f = lambda f: f['format_id'] == format_spec
 967             matches = list(filter(filter_f, available_formats))
 968             if matches:
 969                 return matches[-1]
 970         return None
 971
 972     def _calc_headers(self, info_dict):
 973         res = std_headers.copy()
 974
 975         add_headers = info_dict.get('http_headers')
 976         if add_headers:
 977             res.update(add_headers)
 978
 979         cookies = self._calc_cookies(info_dict)
 980         if cookies:
 981             res['Cookie'] = cookies
 982
 983         return res
 984
 985     def _calc_cookies(self, info_dict):
 986         pr = compat_urllib_request.Request(info_dict['url'])
 987         self.cookiejar.add_cookie_header(pr)
 988         return pr.get_header('Cookie')
 989
 990     def process_video_result(self, info_dict, download=True):
 991         assert info_dict.get('_type', 'video') == 'video'
 992
 993         if 'id' not in info_dict:
 994             raise ExtractorError('Missing "id" field in extractor result')
 995         if 'title' not in info_dict:
 996             raise ExtractorError('Missing "title" field in extractor result')
 997
 998         if 'playlist' not in info_dict:
 999             # It isn't part of a playlist
1000             info_dict['playlist'] = None
1001             info_dict['playlist_index'] = None
1002
1003         thumbnails = info_dict.get('thumbnails')
1004         if thumbnails is None:
1005             thumbnail = info_dict.get('thumbnail')
1006             if thumbnail:
1007                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1008         if thumbnails:
1009             thumbnails.sort(key=lambda t: (
1010                 t.get('preference'), t.get('width'), t.get('height'),
1011                 t.get('id'), t.get('url')))
1012             for i, t in enumerate(thumbnails):
1013                 if t.get('width') and t.get('height'):
1014                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1015                 if t.get('id') is None:
1016                     t['id'] = '%d' % i
1017
1018         if thumbnails and 'thumbnail' not in info_dict:
1019             info_dict['thumbnail'] = thumbnails[-1]['url']
1020
1021         if 'display_id' not in info_dict and 'id' in info_dict:
1022             info_dict['display_id'] = info_dict['id']
1023
1024         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1025             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1026             # see http://bugs.python.org/issue1646728)
1027             try:
1028                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1029                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1030             except (ValueError, OverflowError, OSError):
1031                 pass
1032
1033         if self.params.get('listsubtitles', False):
1034             if 'automatic_captions' in info_dict:
1035                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1036             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1037             return
1038         info_dict['requested_subtitles'] = self.process_subtitles(
1039             info_dict['id'], info_dict.get('subtitles'),
1040             info_dict.get('automatic_captions'))
1041
1042         # We now pick which formats have to be downloaded
1043         if info_dict.get('formats') is None:
1044             # There's only one format available
1045             formats = [info_dict]
1046         else:
1047             formats = info_dict['formats']
1048
1049         if not formats:
1050             raise ExtractorError('No video formats found!')
1051
1052         formats_dict = {}
1053
1054         # We check that all the formats have the format and format_id fields
1055         for i, format in enumerate(formats):
1056             if 'url' not in format:
1057                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1058
1059             if format.get('format_id') is None:
1060                 format['format_id'] = compat_str(i)
1061             format_id = format['format_id']
1062             if format_id not in formats_dict:
1063                 formats_dict[format_id] = []
1064             formats_dict[format_id].append(format)
1065
1066         # Make sure all formats have unique format_id
1067         for format_id, ambiguous_formats in formats_dict.items():
1068             if len(ambiguous_formats) > 1:
1069                 for i, format in enumerate(ambiguous_formats):
1070                     format['format_id'] = '%s-%d' % (format_id, i)
1071
1072         for i, format in enumerate(formats):
1073             if format.get('format') is None:
1074                 format['format'] = '{id} - {res}{note}'.format(
1075                     id=format['format_id'],
1076                     res=self.format_resolution(format),
1077                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1078                 )
1079             # Automatically determine file extension if missing
1080             if 'ext' not in format:
1081                 format['ext'] = determine_ext(format['url']).lower()
1082             # Add HTTP headers, so that external programs can use them from the
1083             # json output
1084             full_format_info = info_dict.copy()
1085             full_format_info.update(format)
1086             format['http_headers'] = self._calc_headers(full_format_info)
1087
1088         # TODO Central sorting goes here
1089
1090         if formats[0] is not info_dict:
1091             # only set the 'formats' fields if the original info_dict list them
1092             # otherwise we end up with a circular reference, the first (and unique)
1093             # element in the 'formats' field in info_dict is info_dict itself,
1094             # wich can't be exported to json
1095             info_dict['formats'] = formats
1096         if self.params.get('listformats'):
1097             self.list_formats(info_dict)
1098             return
1099         if self.params.get('list_thumbnails'):
1100             self.list_thumbnails(info_dict)
1101             return
1102
1103         req_format = self.params.get('format')
1104         if req_format is None:
1105             req_format_list = []
1106             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1107                     info_dict['extractor'] in ['youtube', 'ted'] and
1108                     not info_dict.get('is_live')):
1109                 merger = FFmpegMergerPP(self)
1110                 if merger.available and merger.can_merge():
1111                     req_format_list.append('bestvideo+bestaudio')
1112             req_format_list.append('best')
1113             req_format = '/'.join(req_format_list)
1114         formats_to_download = []
1115         if req_format == 'all':
1116             formats_to_download = formats
1117         else:
1118             for rfstr in req_format.split(','):
1119                 # We can accept formats requested in the format: 34/5/best, we pick
1120                 # the first that is available, starting from left
1121                 req_formats = rfstr.split('/')
1122                 for rf in req_formats:
1123                     if re.match(r'.+?\+.+?', rf) is not None:
1124                         # Two formats have been requested like '137+139'
1125                         format_1, format_2 = rf.split('+')
1126                         formats_info = (self.select_format(format_1, formats),
1127                                         self.select_format(format_2, formats))
1128                         if all(formats_info):
1129                             # The first format must contain the video and the
1130                             # second the audio
1131                             if formats_info[0].get('vcodec') == 'none':
1132                                 self.report_error('The first format must '
1133                                                   'contain the video, try using '
1134                                                   '"-f %s+%s"' % (format_2, format_1))
1135                                 return
1136                             output_ext = (
1137                                 formats_info[0]['ext']
1138                                 if self.params.get('merge_output_format') is None
1139                                 else self.params['merge_output_format'])
1140                             selected_format = {
1141                                 'requested_formats': formats_info,
1142                                 'format': '%s+%s' % (formats_info[0].get('format'),
1143                                                      formats_info[1].get('format')),
1144                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1145                                                         formats_info[1].get('format_id')),
1146                                 'width': formats_info[0].get('width'),
1147                                 'height': formats_info[0].get('height'),
1148                                 'resolution': formats_info[0].get('resolution'),
1149                                 'fps': formats_info[0].get('fps'),
1150                                 'vcodec': formats_info[0].get('vcodec'),
1151                                 'vbr': formats_info[0].get('vbr'),
1152                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1153                                 'acodec': formats_info[1].get('acodec'),
1154                                 'abr': formats_info[1].get('abr'),
1155                                 'ext': output_ext,
1156                             }
1157                         else:
1158                             selected_format = None
1159                     else:
1160                         selected_format = self.select_format(rf, formats)
1161                     if selected_format is not None:
1162                         formats_to_download.append(selected_format)
1163                         break
1164         if not formats_to_download:
1165             raise ExtractorError('requested format not available',
1166                                  expected=True)
1167
1168         if download:
1169             if len(formats_to_download) > 1:
1170                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1171             for format in formats_to_download:
1172                 new_info = dict(info_dict)
1173                 new_info.update(format)
1174                 self.process_info(new_info)
1175         # We update the info dict with the best quality format (backwards compatibility)
1176         info_dict.update(formats_to_download[-1])
1177         return info_dict
1178
1179     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1180         """Select the requested subtitles and their format"""
1181         available_subs = {}
1182         if normal_subtitles and self.params.get('writesubtitles'):
1183             available_subs.update(normal_subtitles)
1184         if automatic_captions and self.params.get('writeautomaticsub'):
1185             for lang, cap_info in automatic_captions.items():
1186                 if lang not in available_subs:
1187                     available_subs[lang] = cap_info
1188
1189         if (not self.params.get('writesubtitles') and not
1190                 self.params.get('writeautomaticsub') or not
1191                 available_subs):
1192             return None
1193
1194         if self.params.get('allsubtitles', False):
1195             requested_langs = available_subs.keys()
1196         else:
1197             if self.params.get('subtitleslangs', False):
1198                 requested_langs = self.params.get('subtitleslangs')
1199             elif 'en' in available_subs:
1200                 requested_langs = ['en']
1201             else:
1202                 requested_langs = [list(available_subs.keys())[0]]
1203
1204         formats_query = self.params.get('subtitlesformat', 'best')
1205         formats_preference = formats_query.split('/') if formats_query else []
1206         subs = {}
1207         for lang in requested_langs:
1208             formats = available_subs.get(lang)
1209             if formats is None:
1210                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1211                 continue
1212             for ext in formats_preference:
1213                 if ext == 'best':
1214                     f = formats[-1]
1215                     break
1216                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1217                 if matches:
1218                     f = matches[-1]
1219                     break
1220             else:
1221                 f = formats[-1]
1222                 self.report_warning(
1223                     'No subtitle format found matching "%s" for language %s, '
1224                     'using %s' % (formats_query, lang, f['ext']))
1225             subs[lang] = f
1226         return subs
1227
1228     def process_info(self, info_dict):
1229         """Process a single resolved IE result."""
1230
1231         assert info_dict.get('_type', 'video') == 'video'
1232
1233         max_downloads = self.params.get('max_downloads')
1234         if max_downloads is not None:
1235             if self._num_downloads >= int(max_downloads):
1236                 raise MaxDownloadsReached()
1237
1238         info_dict['fulltitle'] = info_dict['title']
1239         if len(info_dict['title']) > 200:
1240             info_dict['title'] = info_dict['title'][:197] + '...'
1241
1242         if 'format' not in info_dict:
1243             info_dict['format'] = info_dict['ext']
1244
1245         reason = self._match_entry(info_dict, incomplete=False)
1246         if reason is not None:
1247             self.to_screen('[download] ' + reason)
1248             return
1249
1250         self._num_downloads += 1
1251
1252         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1253
1254         # Forced printings
1255         if self.params.get('forcetitle', False):
1256             self.to_stdout(info_dict['fulltitle'])
1257         if self.params.get('forceid', False):
1258             self.to_stdout(info_dict['id'])
1259         if self.params.get('forceurl', False):
1260             if info_dict.get('requested_formats') is not None:
1261                 for f in info_dict['requested_formats']:
1262                     self.to_stdout(f['url'] + f.get('play_path', ''))
1263             else:
1264                 # For RTMP URLs, also include the playpath
1265                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1266         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1267             self.to_stdout(info_dict['thumbnail'])
1268         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1269             self.to_stdout(info_dict['description'])
1270         if self.params.get('forcefilename', False) and filename is not None:
1271             self.to_stdout(filename)
1272         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1273             self.to_stdout(formatSeconds(info_dict['duration']))
1274         if self.params.get('forceformat', False):
1275             self.to_stdout(info_dict['format'])
1276         if self.params.get('forcejson', False):
1277             self.to_stdout(json.dumps(info_dict))
1278
1279         # Do nothing else if in simulate mode
1280         if self.params.get('simulate', False):
1281             return
1282
1283         if filename is None:
1284             return
1285
1286         try:
1287             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1288             if dn and not os.path.exists(dn):
1289                 os.makedirs(dn)
1290         except (OSError, IOError) as err:
1291             self.report_error('unable to create directory ' + compat_str(err))
1292             return
1293
1294         if self.params.get('writedescription', False):
1295             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1296             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1297                 self.to_screen('[info] Video description is already present')
1298             elif info_dict.get('description') is None:
1299                 self.report_warning('There\'s no description to write.')
1300             else:
1301                 try:
1302                     self.to_screen('[info] Writing video description to: ' + descfn)
1303                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1304                         descfile.write(info_dict['description'])
1305                 except (OSError, IOError):
1306                     self.report_error('Cannot write description file ' + descfn)
1307                     return
1308
1309         if self.params.get('writeannotations', False):
1310             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1311             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1312                 self.to_screen('[info] Video annotations are already present')
1313             else:
1314                 try:
1315                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1316                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1317                         annofile.write(info_dict['annotations'])
1318                 except (KeyError, TypeError):
1319                     self.report_warning('There are no annotations to write.')
1320                 except (OSError, IOError):
1321                     self.report_error('Cannot write annotations file: ' + annofn)
1322                     return
1323
1324         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1325                                        self.params.get('writeautomaticsub')])
1326
1327         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1328             # subtitles download errors are already managed as troubles in relevant IE
1329             # that way it will silently go on when used with unsupporting IE
1330             subtitles = info_dict['requested_subtitles']
1331             ie = self.get_info_extractor(info_dict['extractor_key'])
1332             for sub_lang, sub_info in subtitles.items():
1333                 sub_format = sub_info['ext']
1334                 if sub_info.get('data') is not None:
1335                     sub_data = sub_info['data']
1336                 else:
1337                     try:
1338                         sub_data = ie._download_webpage(
1339                             sub_info['url'], info_dict['id'], note=False)
1340                     except ExtractorError as err:
1341                         self.report_warning('Unable to download subtitle for "%s": %s' %
1342                                             (sub_lang, compat_str(err.cause)))
1343                         continue
1344                 try:
1345                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1346                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1347                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1348                     else:
1349                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1350                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1351                             subfile.write(sub_data)
1352                 except (OSError, IOError):
1353                     self.report_error('Cannot write subtitles file ' + sub_filename)
1354                     return
1355
1356         if self.params.get('writeinfojson', False):
1357             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1358             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1359                 self.to_screen('[info] Video description metadata is already present')
1360             else:
1361                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1362                 try:
1363                     write_json_file(self.filter_requested_info(info_dict), infofn)
1364                 except (OSError, IOError):
1365                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1366                     return
1367
1368         self._write_thumbnails(info_dict, filename)
1369
1370         if not self.params.get('skip_download', False):
1371             try:
1372                 def dl(name, info):
1373                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1374                     for ph in self._progress_hooks:
1375                         fd.add_progress_hook(ph)
1376                     if self.params.get('verbose'):
1377                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1378                     return fd.download(name, info)
1379
1380                 if info_dict.get('requested_formats') is not None:
1381                     downloaded = []
1382                     success = True
1383                     merger = FFmpegMergerPP(self)
1384                     if not merger.available:
1385                         postprocessors = []
1386                         self.report_warning('You have requested multiple '
1387                                             'formats but ffmpeg or avconv are not installed.'
1388                                             ' The formats won\'t be merged.')
1389                     else:
1390                         postprocessors = [merger]
1391
1392                     def compatible_formats(formats):
1393                         video, audio = formats
1394                         # Check extension
1395                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1396                         if video_ext and audio_ext:
1397                             COMPATIBLE_EXTS = (
1398                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1399                                 ('webm')
1400                             )
1401                             for exts in COMPATIBLE_EXTS:
1402                                 if video_ext in exts and audio_ext in exts:
1403                                     return True
1404                         # TODO: Check acodec/vcodec
1405                         return False
1406
1407                     filename_real_ext = os.path.splitext(filename)[1][1:]
1408                     filename_wo_ext = (
1409                         os.path.splitext(filename)[0]
1410                         if filename_real_ext == info_dict['ext']
1411                         else filename)
1412                     requested_formats = info_dict['requested_formats']
1413                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1414                         info_dict['ext'] = 'mkv'
1415                         self.report_warning(
1416                             'Requested formats are incompatible for merge and will be merged into mkv.')
1417                     # Ensure filename always has a correct extension for successful merge
1418                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1419                     if os.path.exists(encodeFilename(filename)):
1420                         self.to_screen(
1421                             '[download] %s has already been downloaded and '
1422                             'merged' % filename)
1423                     else:
1424                         for f in requested_formats:
1425                             new_info = dict(info_dict)
1426                             new_info.update(f)
1427                             fname = self.prepare_filename(new_info)
1428                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1429                             downloaded.append(fname)
1430                             partial_success = dl(fname, new_info)
1431                             success = success and partial_success
1432                         info_dict['__postprocessors'] = postprocessors
1433                         info_dict['__files_to_merge'] = downloaded
1434                 else:
1435                     # Just a single file
1436                     success = dl(filename, info_dict)
1437             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1438                 self.report_error('unable to download video data: %s' % str(err))
1439                 return
1440             except (OSError, IOError) as err:
1441                 raise UnavailableVideoError(err)
1442             except (ContentTooShortError, ) as err:
1443                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1444                 return
1445
1446             if success:
1447                 # Fixup content
1448                 fixup_policy = self.params.get('fixup')
1449                 if fixup_policy is None:
1450                     fixup_policy = 'detect_or_warn'
1451
1452                 stretched_ratio = info_dict.get('stretched_ratio')
1453                 if stretched_ratio is not None and stretched_ratio != 1:
1454                     if fixup_policy == 'warn':
1455                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1456                             info_dict['id'], stretched_ratio))
1457                     elif fixup_policy == 'detect_or_warn':
1458                         stretched_pp = FFmpegFixupStretchedPP(self)
1459                         if stretched_pp.available:
1460                             info_dict.setdefault('__postprocessors', [])
1461                             info_dict['__postprocessors'].append(stretched_pp)
1462                         else:
1463                             self.report_warning(
1464                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1465                                     info_dict['id'], stretched_ratio))
1466                     else:
1467                         assert fixup_policy in ('ignore', 'never')
1468
1469                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1470                     if fixup_policy == 'warn':
1471                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1472                             info_dict['id']))
1473                     elif fixup_policy == 'detect_or_warn':
1474                         fixup_pp = FFmpegFixupM4aPP(self)
1475                         if fixup_pp.available:
1476                             info_dict.setdefault('__postprocessors', [])
1477                             info_dict['__postprocessors'].append(fixup_pp)
1478                         else:
1479                             self.report_warning(
1480                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1481                                     info_dict['id']))
1482                     else:
1483                         assert fixup_policy in ('ignore', 'never')
1484
1485                 try:
1486                     self.post_process(filename, info_dict)
1487                 except (PostProcessingError) as err:
1488                     self.report_error('postprocessing: %s' % str(err))
1489                     return
1490                 self.record_download_archive(info_dict)
1491
1492     def download(self, url_list):
1493         """Download a given list of URLs."""
1494         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1495         if (len(url_list) > 1 and
1496                 '%' not in outtmpl and
1497                 self.params.get('max_downloads') != 1):
1498             raise SameFileError(outtmpl)
1499
1500         for url in url_list:
1501             try:
1502                 # It also downloads the videos
1503                 res = self.extract_info(
1504                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1505             except UnavailableVideoError:
1506                 self.report_error('unable to download video')
1507             except MaxDownloadsReached:
1508                 self.to_screen('[info] Maximum number of downloaded files reached.')
1509                 raise
1510             else:
1511                 if self.params.get('dump_single_json', False):
1512                     self.to_stdout(json.dumps(res))
1513
1514         return self._download_retcode
1515
1516     def download_with_info_file(self, info_filename):
1517         with contextlib.closing(fileinput.FileInput(
1518                 [info_filename], mode='r',
1519                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1520             # FileInput doesn't have a read method, we can't call json.load
1521             info = self.filter_requested_info(json.loads('\n'.join(f)))
1522         try:
1523             self.process_ie_result(info, download=True)
1524         except DownloadError:
1525             webpage_url = info.get('webpage_url')
1526             if webpage_url is not None:
1527                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1528                 return self.download([webpage_url])
1529             else:
1530                 raise
1531         return self._download_retcode
1532
1533     @staticmethod
1534     def filter_requested_info(info_dict):
1535         return dict(
1536             (k, v) for k, v in info_dict.items()
1537             if k not in ['requested_formats', 'requested_subtitles'])
1538
1539     def post_process(self, filename, ie_info):
1540         """Run all the postprocessors on the given file."""
1541         info = dict(ie_info)
1542         info['filepath'] = filename
1543         pps_chain = []
1544         if ie_info.get('__postprocessors') is not None:
1545             pps_chain.extend(ie_info['__postprocessors'])
1546         pps_chain.extend(self._pps)
1547         for pp in pps_chain:
1548             files_to_delete = []
1549             try:
1550                 files_to_delete, info = pp.run(info)
1551             except PostProcessingError as e:
1552                 self.report_error(e.msg)
1553             if files_to_delete and not self.params.get('keepvideo', False):
1554                 for old_filename in files_to_delete:
1555                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1556                     try:
1557                         os.remove(encodeFilename(old_filename))
1558                     except (IOError, OSError):
1559                         self.report_warning('Unable to remove downloaded original file')
1560
1561     def _make_archive_id(self, info_dict):
1562         # Future-proof against any change in case
1563         # and backwards compatibility with prior versions
1564         extractor = info_dict.get('extractor_key')
1565         if extractor is None:
1566             if 'id' in info_dict:
1567                 extractor = info_dict.get('ie_key')  # key in a playlist
1568         if extractor is None:
1569             return None  # Incomplete video information
1570         return extractor.lower() + ' ' + info_dict['id']
1571
1572     def in_download_archive(self, info_dict):
1573         fn = self.params.get('download_archive')
1574         if fn is None:
1575             return False
1576
1577         vid_id = self._make_archive_id(info_dict)
1578         if vid_id is None:
1579             return False  # Incomplete video information
1580
1581         try:
1582             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1583                 for line in archive_file:
1584                     if line.strip() == vid_id:
1585                         return True
1586         except IOError as ioe:
1587             if ioe.errno != errno.ENOENT:
1588                 raise
1589         return False
1590
1591     def record_download_archive(self, info_dict):
1592         fn = self.params.get('download_archive')
1593         if fn is None:
1594             return
1595         vid_id = self._make_archive_id(info_dict)
1596         assert vid_id
1597         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1598             archive_file.write(vid_id + '\n')
1599
1600     @staticmethod
1601     def format_resolution(format, default='unknown'):
1602         if format.get('vcodec') == 'none':
1603             return 'audio only'
1604         if format.get('resolution') is not None:
1605             return format['resolution']
1606         if format.get('height') is not None:
1607             if format.get('width') is not None:
1608                 res = '%sx%s' % (format['width'], format['height'])
1609             else:
1610                 res = '%sp' % format['height']
1611         elif format.get('width') is not None:
1612             res = '?x%d' % format['width']
1613         else:
1614             res = default
1615         return res
1616
1617     def _format_note(self, fdict):
1618         res = ''
1619         if fdict.get('ext') in ['f4f', 'f4m']:
1620             res += '(unsupported) '
1621         if fdict.get('format_note') is not None:
1622             res += fdict['format_note'] + ' '
1623         if fdict.get('tbr') is not None:
1624             res += '%4dk ' % fdict['tbr']
1625         if fdict.get('container') is not None:
1626             if res:
1627                 res += ', '
1628             res += '%s container' % fdict['container']
1629         if (fdict.get('vcodec') is not None and
1630                 fdict.get('vcodec') != 'none'):
1631             if res:
1632                 res += ', '
1633             res += fdict['vcodec']
1634             if fdict.get('vbr') is not None:
1635                 res += '@'
1636         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1637             res += 'video@'
1638         if fdict.get('vbr') is not None:
1639             res += '%4dk' % fdict['vbr']
1640         if fdict.get('fps') is not None:
1641             res += ', %sfps' % fdict['fps']
1642         if fdict.get('acodec') is not None:
1643             if res:
1644                 res += ', '
1645             if fdict['acodec'] == 'none':
1646                 res += 'video only'
1647             else:
1648                 res += '%-5s' % fdict['acodec']
1649         elif fdict.get('abr') is not None:
1650             if res:
1651                 res += ', '
1652             res += 'audio'
1653         if fdict.get('abr') is not None:
1654             res += '@%3dk' % fdict['abr']
1655         if fdict.get('asr') is not None:
1656             res += ' (%5dHz)' % fdict['asr']
1657         if fdict.get('filesize') is not None:
1658             if res:
1659                 res += ', '
1660             res += format_bytes(fdict['filesize'])
1661         elif fdict.get('filesize_approx') is not None:
1662             if res:
1663                 res += ', '
1664             res += '~' + format_bytes(fdict['filesize_approx'])
1665         return res
1666
1667     def list_formats(self, info_dict):
1668         formats = info_dict.get('formats', [info_dict])
1669         table = [
1670             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1671             for f in formats
1672             if f.get('preference') is None or f['preference'] >= -1000]
1673         if len(formats) > 1:
1674             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1675
1676         header_line = ['format code', 'extension', 'resolution', 'note']
1677         self.to_screen(
1678             '[info] Available formats for %s:\n%s' %
1679             (info_dict['id'], render_table(header_line, table)))
1680
1681     def list_thumbnails(self, info_dict):
1682         thumbnails = info_dict.get('thumbnails')
1683         if not thumbnails:
1684             tn_url = info_dict.get('thumbnail')
1685             if tn_url:
1686                 thumbnails = [{'id': '0', 'url': tn_url}]
1687             else:
1688                 self.to_screen(
1689                     '[info] No thumbnails present for %s' % info_dict['id'])
1690                 return
1691
1692         self.to_screen(
1693             '[info] Thumbnails for %s:' % info_dict['id'])
1694         self.to_screen(render_table(
1695             ['ID', 'width', 'height', 'URL'],
1696             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1697
1698     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1699         if not subtitles:
1700             self.to_screen('%s has no %s' % (video_id, name))
1701             return
1702         self.to_screen(
1703             'Available %s for %s:' % (name, video_id))
1704         self.to_screen(render_table(
1705             ['Language', 'formats'],
1706             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1707                 for lang, formats in subtitles.items()]))
1708
1709     def urlopen(self, req):
1710         """ Start an HTTP download """
1711
1712         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1713         # always respected by websites, some tend to give out URLs with non percent-encoded
1714         # non-ASCII characters (see telemb.py, ard.py [#3412])
1715         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1716         # To work around aforementioned issue we will replace request's original URL with
1717         # percent-encoded one
1718         req_is_string = isinstance(req, compat_basestring)
1719         url = req if req_is_string else req.get_full_url()
1720         url_escaped = escape_url(url)
1721
1722         # Substitute URL if any change after escaping
1723         if url != url_escaped:
1724             if req_is_string:
1725                 req = url_escaped
1726             else:
1727                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1728                 req = req_type(
1729                     url_escaped, data=req.data, headers=req.headers,
1730                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1731
1732         return self._opener.open(req, timeout=self._socket_timeout)
1733
1734     def print_debug_header(self):
1735         if not self.params.get('verbose'):
1736             return
1737
1738         if type('') is not compat_str:
1739             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1740             self.report_warning(
1741                 'Your Python is broken! Update to a newer and supported version')
1742
1743         stdout_encoding = getattr(
1744             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1745         encoding_str = (
1746             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1747                 locale.getpreferredencoding(),
1748                 sys.getfilesystemencoding(),
1749                 stdout_encoding,
1750                 self.get_encoding()))
1751         write_string(encoding_str, encoding=None)
1752
1753         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1754         try:
1755             sp = subprocess.Popen(
1756                 ['git', 'rev-parse', '--short', 'HEAD'],
1757                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1758                 cwd=os.path.dirname(os.path.abspath(__file__)))
1759             out, err = sp.communicate()
1760             out = out.decode().strip()
1761             if re.match('[0-9a-f]+', out):
1762                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1763         except Exception:
1764             try:
1765                 sys.exc_clear()
1766             except Exception:
1767                 pass
1768         self._write_string('[debug] Python version %s - %s\n' % (
1769             platform.python_version(), platform_name()))
1770
1771         exe_versions = FFmpegPostProcessor.get_versions(self)
1772         exe_versions['rtmpdump'] = rtmpdump_version()
1773         exe_str = ', '.join(
1774             '%s %s' % (exe, v)
1775             for exe, v in sorted(exe_versions.items())
1776             if v
1777         )
1778         if not exe_str:
1779             exe_str = 'none'
1780         self._write_string('[debug] exe versions: %s\n' % exe_str)
1781
1782         proxy_map = {}
1783         for handler in self._opener.handlers:
1784             if hasattr(handler, 'proxies'):
1785                 proxy_map.update(handler.proxies)
1786         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1787
1788         if self.params.get('call_home', False):
1789             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1790             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1791             latest_version = self.urlopen(
1792                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1793             if version_tuple(latest_version) > version_tuple(__version__):
1794                 self.report_warning(
1795                     'You are using an outdated version (newest version: %s)! '
1796                     'See https://yt-dl.org/update if you need help updating.' %
1797                     latest_version)
1798
1799     def _setup_opener(self):
1800         timeout_val = self.params.get('socket_timeout')
1801         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1802
1803         opts_cookiefile = self.params.get('cookiefile')
1804         opts_proxy = self.params.get('proxy')
1805
1806         if opts_cookiefile is None:
1807             self.cookiejar = compat_cookiejar.CookieJar()
1808         else:
1809             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1810                 opts_cookiefile)
1811             if os.access(opts_cookiefile, os.R_OK):
1812                 self.cookiejar.load()
1813
1814         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1815             self.cookiejar)
1816         if opts_proxy is not None:
1817             if opts_proxy == '':
1818                 proxies = {}
1819             else:
1820                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1821         else:
1822             proxies = compat_urllib_request.getproxies()
1823             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1824             if 'http' in proxies and 'https' not in proxies:
1825                 proxies['https'] = proxies['http']
1826         proxy_handler = PerRequestProxyHandler(proxies)
1827
1828         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1829         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1830         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1831         opener = compat_urllib_request.build_opener(
1832             proxy_handler, https_handler, cookie_processor, ydlh)
1833
1834         # Delete the default user-agent header, which would otherwise apply in
1835         # cases where our custom HTTP handler doesn't come into play
1836         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1837         opener.addheaders = []
1838         self._opener = opener
1839
1840     def encode(self, s):
1841         if isinstance(s, bytes):
1842             return s  # Already encoded
1843
1844         try:
1845             return s.encode(self.get_encoding())
1846         except UnicodeEncodeError as err:
1847             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1848             raise
1849
1850     def get_encoding(self):
1851         encoding = self.params.get('encoding')
1852         if encoding is None:
1853             encoding = preferredencoding()
1854         return encoding
1855
1856     def _write_thumbnails(self, info_dict, filename):
1857         if self.params.get('writethumbnail', False):
1858             thumbnails = info_dict.get('thumbnails')
1859             if thumbnails:
1860                 thumbnails = [thumbnails[-1]]
1861         elif self.params.get('write_all_thumbnails', False):
1862             thumbnails = info_dict.get('thumbnails')
1863         else:
1864             return
1865
1866         if not thumbnails:
1867             # No thumbnails present, so return immediately
1868             return
1869
1870         for t in thumbnails:
1871             thumb_ext = determine_ext(t['url'], 'jpg')
1872             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1873             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1874             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1875
1876             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1877                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1878                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1879             else:
1880                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1881                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1882                 try:
1883                     uf = self.urlopen(t['url'])
1884                     with open(thumb_filename, 'wb') as thumbf:
1885                         shutil.copyfileobj(uf, thumbf)
1886                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1887                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1888                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1889                     self.report_warning('Unable to download thumbnail "%s": %s' %
1890                                         (t['url'], compat_str(err)))