youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import platform
  11 import re
  12 import shutil
  13 import subprocess
  14 import socket
  15 import sys
  16 import time
  17 import traceback
  18
  19 if os.name == 'nt':
  20     import ctypes
  21
  22 from .utils import (
  23     compat_cookiejar,
  24     compat_http_client,
  25     compat_print,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     locked_file,
  38     make_HTTPS_handler,
  39     MaxDownloadsReached,
  40     PostProcessingError,
  41     platform_name,
  42     preferredencoding,
  43     SameFileError,
  44     sanitize_filename,
  45     subtitles_filename,
  46     takewhile_inclusive,
  47     UnavailableVideoError,
  48     write_json_file,
  49     write_string,
  50     YoutubeDLHandler,
  51 )
  52 from .extractor import get_info_extractor, gen_extractors
  53 from .FileDownloader import FileDownloader
  54 from .version import __version__
  55
  56
  57 class YoutubeDL(object):
  58     """YoutubeDL class.
  59
  60     YoutubeDL objects are the ones responsible of downloading the
  61     actual video file and writing it to disk if the user has requested
  62     it, among some other tasks. In most cases there should be one per
  63     program. As, given a video URL, the downloader doesn't know how to
  64     extract all the needed information, task that InfoExtractors do, it
  65     has to pass the URL to one of them.
  66
  67     For this, YoutubeDL objects have a method that allows
  68     InfoExtractors to be registered in a given order. When it is passed
  69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  70     finds that reports being able to handle it. The InfoExtractor extracts
  71     all the information about the video or videos the URL refers to, and
  72     YoutubeDL process the extracted information, possibly using a File
  73     Downloader to download the video.
  74
  75     YoutubeDL objects accept a lot of parameters. In order not to saturate
  76     the object constructor with arguments, it receives a dictionary of
  77     options instead. These options are available through the params
  78     attribute for the InfoExtractors to use. The YoutubeDL also
  79     registers itself as the downloader in charge for the InfoExtractors
  80     that are added to it, so this is a "mutual registration".
  81
  82     Available options:
  83
  84     username:          Username for authentication purposes.
  85     password:          Password for authentication purposes.
  86     videopassword:     Password for acces a video.
  87     usenetrc:          Use netrc for authentication instead.
  88     verbose:           Print additional info to stdout.
  89     quiet:             Do not print messages to stdout.
  90     forceurl:          Force printing final URL.
  91     forcetitle:        Force printing title.
  92     forceid:           Force printing ID.
  93     forcethumbnail:    Force printing thumbnail URL.
  94     forcedescription:  Force printing description.
  95     forcefilename:     Force printing final filename.
  96     forcejson:         Force printing info_dict as JSON.
  97     simulate:          Do not download the video files.
  98     format:            Video format code.
  99     format_limit:      Highest quality format to try.
 100     outtmpl:           Template for output names.
 101     restrictfilenames: Do not allow "&" and spaces in file names
 102     ignoreerrors:      Do not stop on download errors.
 103     nooverwrites:      Prevent overwriting files.
 104     playliststart:     Playlist item to start at.
 105     playlistend:       Playlist item to end at.
 106     matchtitle:        Download only matching titles.
 107     rejecttitle:       Reject downloads for matching titles.
 108     logger:            Log messages to a logging.Logger instance.
 109     logtostderr:       Log messages to stderr instead of stdout.
 110     writedescription:  Write the video description to a .description file
 111     writeinfojson:     Write the video description to a .info.json file
 112     writeannotations:  Write the video annotations to a .annotations.xml file
 113     writethumbnail:    Write the thumbnail image to a file
 114     writesubtitles:    Write the video subtitles to a file
 115     writeautomaticsub: Write the automatic subtitles to a file
 116     allsubtitles:      Downloads all the subtitles of the video
 117                        (requires writesubtitles or writeautomaticsub)
 118     listsubtitles:     Lists all available subtitles for the video
 119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 120     subtitleslangs:    List of languages of the subtitles to download
 121     keepvideo:         Keep the video file after post-processing
 122     daterange:         A DateRange object, download only if the upload_date is in the range.
 123     skip_download:     Skip the actual download of the video file
 124     cachedir:          Location of the cache files in the filesystem.
 125                        None to disable filesystem cache.
 126     noplaylist:        Download single video instead of a playlist if in doubt.
 127     age_limit:         An integer representing the user's age in years.
 128                        Unsuitable videos for the given age are skipped.
 129     downloadarchive:   File name of a file where all downloads are recorded.
 130                        Videos already present in the file are not downloaded
 131                        again.
 132     cookiefile:        File name where cookies should be read from and dumped to.
 133     nocheckcertificate:Do not verify SSL certificates
 134     proxy:             URL of the proxy server to use
 135
 136     The following parameters are not used by YoutubeDL itself, they are used by
 137     the FileDownloader:
 138     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 139     noresizebuffer, retries, continuedl, noprogress, consoletitle
 140     """
 141
 142     params = None
 143     _ies = []
 144     _pps = []
 145     _download_retcode = None
 146     _num_downloads = None
 147     _screen_file = None
 148
 149     def __init__(self, params={}):
 150         """Create a FileDownloader object with the given options."""
 151         self._ies = []
 152         self._ies_instances = {}
 153         self._pps = []
 154         self._progress_hooks = []
 155         self._download_retcode = 0
 156         self._num_downloads = 0
 157         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 158
 159         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 160                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 161                 and not params['restrictfilenames']):
 162             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 163             self.report_warning(
 164                 u'Assuming --restrict-filenames since file system encoding '
 165                 u'cannot encode all charactes. '
 166                 u'Set the LC_ALL environment variable to fix this.')
 167             params['restrictfilenames'] = True
 168
 169         self.params = params
 170         self.fd = FileDownloader(self, self.params)
 171
 172         if '%(stitle)s' in self.params.get('outtmpl', ''):
 173             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 174
 175         self._setup_opener()
 176
 177     def add_info_extractor(self, ie):
 178         """Add an InfoExtractor object to the end of the list."""
 179         self._ies.append(ie)
 180         self._ies_instances[ie.ie_key()] = ie
 181         ie.set_downloader(self)
 182
 183     def get_info_extractor(self, ie_key):
 184         """
 185         Get an instance of an IE with name ie_key, it will try to get one from
 186         the _ies list, if there's no instance it will create a new one and add
 187         it to the extractor list.
 188         """
 189         ie = self._ies_instances.get(ie_key)
 190         if ie is None:
 191             ie = get_info_extractor(ie_key)()
 192             self.add_info_extractor(ie)
 193         return ie
 194
 195     def add_default_info_extractors(self):
 196         """
 197         Add the InfoExtractors returned by gen_extractors to the end of the list
 198         """
 199         for ie in gen_extractors():
 200             self.add_info_extractor(ie)
 201
 202     def add_post_processor(self, pp):
 203         """Add a PostProcessor object to the end of the chain."""
 204         self._pps.append(pp)
 205         pp.set_downloader(self)
 206
 207     def to_screen(self, message, skip_eol=False):
 208         """Print message to stdout if not in quiet mode."""
 209         if self.params.get('logger'):
 210             self.params['logger'].debug(message)
 211         elif not self.params.get('quiet', False):
 212             terminator = [u'\n', u''][skip_eol]
 213             output = message + terminator
 214             write_string(output, self._screen_file)
 215
 216     def to_stderr(self, message):
 217         """Print message to stderr."""
 218         assert type(message) == type(u'')
 219         if self.params.get('logger'):
 220             self.params['logger'].error(message)
 221         else:
 222             output = message + u'\n'
 223             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 224                 output = output.encode(preferredencoding())
 225             sys.stderr.write(output)
 226
 227     def to_console_title(self, message):
 228         if not self.params.get('consoletitle', False):
 229             return
 230         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 231             # c_wchar_p() might not be necessary if `message` is
 232             # already of type unicode()
 233             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 234         elif 'TERM' in os.environ:
 235             write_string(u'\033]0;%s\007' % message, self._screen_file)
 236
 237     def save_console_title(self):
 238         if not self.params.get('consoletitle', False):
 239             return
 240         if 'TERM' in os.environ:
 241             # Save the title on stack
 242             write_string(u'\033[22;0t', self._screen_file)
 243
 244     def restore_console_title(self):
 245         if not self.params.get('consoletitle', False):
 246             return
 247         if 'TERM' in os.environ:
 248             # Restore the title from stack
 249             write_string(u'\033[23;0t', self._screen_file)
 250
 251     def __enter__(self):
 252         self.save_console_title()
 253         return self
 254
 255     def __exit__(self, *args):
 256         self.restore_console_title()
 257
 258         if self.params.get('cookiefile') is not None:
 259             self.cookiejar.save()
 260
 261     def fixed_template(self):
 262         """Checks if the output template is fixed."""
 263         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 264
 265     def trouble(self, message=None, tb=None):
 266         """Determine action to take when a download problem appears.
 267
 268         Depending on if the downloader has been configured to ignore
 269         download errors or not, this method may throw an exception or
 270         not when errors are found, after printing the message.
 271
 272         tb, if given, is additional traceback information.
 273         """
 274         if message is not None:
 275             self.to_stderr(message)
 276         if self.params.get('verbose'):
 277             if tb is None:
 278                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 279                     tb = u''
 280                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 281                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 282                     tb += compat_str(traceback.format_exc())
 283                 else:
 284                     tb_data = traceback.format_list(traceback.extract_stack())
 285                     tb = u''.join(tb_data)
 286             self.to_stderr(tb)
 287         if not self.params.get('ignoreerrors', False):
 288             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 289                 exc_info = sys.exc_info()[1].exc_info
 290             else:
 291                 exc_info = sys.exc_info()
 292             raise DownloadError(message, exc_info)
 293         self._download_retcode = 1
 294
 295     def report_warning(self, message):
 296         '''
 297         Print the message to stderr, it will be prefixed with 'WARNING:'
 298         If stderr is a tty file the 'WARNING:' will be colored
 299         '''
 300         if sys.stderr.isatty() and os.name != 'nt':
 301             _msg_header = u'\033[0;33mWARNING:\033[0m'
 302         else:
 303             _msg_header = u'WARNING:'
 304         warning_message = u'%s %s' % (_msg_header, message)
 305         self.to_stderr(warning_message)
 306
 307     def report_error(self, message, tb=None):
 308         '''
 309         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 310         in red if stderr is a tty file.
 311         '''
 312         if sys.stderr.isatty() and os.name != 'nt':
 313             _msg_header = u'\033[0;31mERROR:\033[0m'
 314         else:
 315             _msg_header = u'ERROR:'
 316         error_message = u'%s %s' % (_msg_header, message)
 317         self.trouble(error_message, tb)
 318
 319     def report_writedescription(self, descfn):
 320         """ Report that the description file is being written """
 321         self.to_screen(u'[info] Writing video description to: ' + descfn)
 322
 323     def report_writesubtitles(self, sub_filename):
 324         """ Report that the subtitles file is being written """
 325         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 326
 327     def report_writeinfojson(self, infofn):
 328         """ Report that the metadata file has been written """
 329         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 330
 331     def report_writeannotations(self, annofn):
 332         """ Report that the annotations file has been written. """
 333         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 334
 335     def report_file_already_downloaded(self, file_name):
 336         """Report file has already been fully downloaded."""
 337         try:
 338             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 339         except UnicodeEncodeError:
 340             self.to_screen(u'[download] The file has already been downloaded')
 341
 342     def increment_downloads(self):
 343         """Increment the ordinal that assigns a number to each file."""
 344         self._num_downloads += 1
 345
 346     def prepare_filename(self, info_dict):
 347         """Generate the output filename."""
 348         try:
 349             template_dict = dict(info_dict)
 350
 351             template_dict['epoch'] = int(time.time())
 352             autonumber_size = self.params.get('autonumber_size')
 353             if autonumber_size is None:
 354                 autonumber_size = 5
 355             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 356             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 357             if template_dict.get('playlist_index') is not None:
 358                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 359
 360             sanitize = lambda k, v: sanitize_filename(
 361                 u'NA' if v is None else compat_str(v),
 362                 restricted=self.params.get('restrictfilenames'),
 363                 is_id=(k == u'id'))
 364             template_dict = dict((k, sanitize(k, v))
 365                                  for k, v in template_dict.items())
 366
 367             tmpl = os.path.expanduser(self.params['outtmpl'])
 368             filename = tmpl % template_dict
 369             return filename
 370         except KeyError as err:
 371             self.report_error(u'Erroneous output template')
 372             return None
 373         except ValueError as err:
 374             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 375             return None
 376
 377     def _match_entry(self, info_dict):
 378         """ Returns None iff the file should be downloaded """
 379
 380         if 'title' in info_dict:
 381             # This can happen when we're just evaluating the playlist
 382             title = info_dict['title']
 383             matchtitle = self.params.get('matchtitle', False)
 384             if matchtitle:
 385                 if not re.search(matchtitle, title, re.IGNORECASE):
 386                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 387             rejecttitle = self.params.get('rejecttitle', False)
 388             if rejecttitle:
 389                 if re.search(rejecttitle, title, re.IGNORECASE):
 390                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 391         date = info_dict.get('upload_date', None)
 392         if date is not None:
 393             dateRange = self.params.get('daterange', DateRange())
 394             if date not in dateRange:
 395                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 396         age_limit = self.params.get('age_limit')
 397         if age_limit is not None:
 398             if age_limit < info_dict.get('age_limit', 0):
 399                 return u'Skipping "' + title + '" because it is age restricted'
 400         if self.in_download_archive(info_dict):
 401             return (u'%s has already been recorded in archive'
 402                     % info_dict.get('title', info_dict.get('id', u'video')))
 403         return None
 404
 405     @staticmethod
 406     def add_extra_info(info_dict, extra_info):
 407         '''Set the keys from extra_info in info dict if they are missing'''
 408         for key, value in extra_info.items():
 409             info_dict.setdefault(key, value)
 410
 411     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 412         '''
 413         Returns a list with a dictionary for each video we find.
 414         If 'download', also downloads the videos.
 415         extra_info is a dict containing the extra values to add to each result
 416          '''
 417
 418         if ie_key:
 419             ies = [self.get_info_extractor(ie_key)]
 420         else:
 421             ies = self._ies
 422
 423         for ie in ies:
 424             if not ie.suitable(url):
 425                 continue
 426
 427             if not ie.working():
 428                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 429                                     u'and will probably not work.')
 430
 431             try:
 432                 ie_result = ie.extract(url)
 433                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 434                     break
 435                 if isinstance(ie_result, list):
 436                     # Backwards compatibility: old IE result format
 437                     ie_result = {
 438                         '_type': 'compat_list',
 439                         'entries': ie_result,
 440                     }
 441                 self.add_extra_info(ie_result,
 442                     {
 443                         'extractor': ie.IE_NAME,
 444                         'webpage_url': url,
 445                         'extractor_key': ie.ie_key(),
 446                     })
 447                 return self.process_ie_result(ie_result, download, extra_info)
 448             except ExtractorError as de: # An error we somewhat expected
 449                 self.report_error(compat_str(de), de.format_traceback())
 450                 break
 451             except Exception as e:
 452                 if self.params.get('ignoreerrors', False):
 453                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 454                     break
 455                 else:
 456                     raise
 457         else:
 458             self.report_error(u'no suitable InfoExtractor: %s' % url)
 459
 460     def process_ie_result(self, ie_result, download=True, extra_info={}):
 461         """
 462         Take the result of the ie(may be modified) and resolve all unresolved
 463         references (URLs, playlist items).
 464
 465         It will also download the videos if 'download'.
 466         Returns the resolved ie_result.
 467         """
 468
 469         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 470         if result_type == 'video':
 471             self.add_extra_info(ie_result, extra_info)
 472             return self.process_video_result(ie_result, download=download)
 473         elif result_type == 'url':
 474             # We have to add extra_info to the results because it may be
 475             # contained in a playlist
 476             return self.extract_info(ie_result['url'],
 477                                      download,
 478                                      ie_key=ie_result.get('ie_key'),
 479                                      extra_info=extra_info)
 480         elif result_type == 'playlist':
 481
 482             # We process each entry in the playlist
 483             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 484             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 485
 486             playlist_results = []
 487
 488             n_all_entries = len(ie_result['entries'])
 489             playliststart = self.params.get('playliststart', 1) - 1
 490             playlistend = self.params.get('playlistend', -1)
 491
 492             if playlistend == -1:
 493                 entries = ie_result['entries'][playliststart:]
 494             else:
 495                 entries = ie_result['entries'][playliststart:playlistend]
 496
 497             n_entries = len(entries)
 498
 499             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 500                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 501
 502             for i, entry in enumerate(entries, 1):
 503                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 504                 extra = {
 505                     'playlist': playlist,
 506                     'playlist_index': i + playliststart,
 507                     'extractor': ie_result['extractor'],
 508                     'webpage_url': ie_result['webpage_url'],
 509                     'extractor_key': ie_result['extractor_key'],
 510                 }
 511
 512                 reason = self._match_entry(entry)
 513                 if reason is not None:
 514                     self.to_screen(u'[download] ' + reason)
 515                     continue
 516
 517                 entry_result = self.process_ie_result(entry,
 518                                                       download=download,
 519                                                       extra_info=extra)
 520                 playlist_results.append(entry_result)
 521             ie_result['entries'] = playlist_results
 522             return ie_result
 523         elif result_type == 'compat_list':
 524             def _fixup(r):
 525                 self.add_extra_info(r,
 526                     {
 527                         'extractor': ie_result['extractor'],
 528                         'webpage_url': ie_result['webpage_url'],
 529                         'extractor_key': ie_result['extractor_key'],
 530                     })
 531                 return r
 532             ie_result['entries'] = [
 533                 self.process_ie_result(_fixup(r), download, extra_info)
 534                 for r in ie_result['entries']
 535             ]
 536             return ie_result
 537         else:
 538             raise Exception('Invalid result type: %s' % result_type)
 539
 540     def select_format(self, format_spec, available_formats):
 541         if format_spec == 'best' or format_spec is None:
 542             return available_formats[-1]
 543         elif format_spec == 'worst':
 544             return available_formats[0]
 545         else:
 546             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 547             if format_spec in extensions:
 548                 filter_f = lambda f: f['ext'] == format_spec
 549             else:
 550                 filter_f = lambda f: f['format_id'] == format_spec
 551             matches = list(filter(filter_f, available_formats))
 552             if matches:
 553                 return matches[-1]
 554         return None
 555
 556     def process_video_result(self, info_dict, download=True):
 557         assert info_dict.get('_type', 'video') == 'video'
 558
 559         if 'playlist' not in info_dict:
 560             # It isn't part of a playlist
 561             info_dict['playlist'] = None
 562             info_dict['playlist_index'] = None
 563
 564         # This extractors handle format selection themselves
 565         if info_dict['extractor'] in [u'youtube', u'Youku']:
 566             if download:
 567                 self.process_info(info_dict)
 568             return info_dict
 569
 570         # We now pick which formats have to be downloaded
 571         if info_dict.get('formats') is None:
 572             # There's only one format available
 573             formats = [info_dict]
 574         else:
 575             formats = info_dict['formats']
 576
 577         # We check that all the formats have the format and format_id fields
 578         for (i, format) in enumerate(formats):
 579             if format.get('format_id') is None:
 580                 format['format_id'] = compat_str(i)
 581             if format.get('format') is None:
 582                 format['format'] = u'{id} - {res}{note}'.format(
 583                     id=format['format_id'],
 584                     res=self.format_resolution(format),
 585                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 586                 )
 587             # Automatically determine file extension if missing
 588             if 'ext' not in format:
 589                 format['ext'] = determine_ext(format['url'])
 590
 591         if self.params.get('listformats', None):
 592             self.list_formats(info_dict)
 593             return
 594
 595         format_limit = self.params.get('format_limit', None)
 596         if format_limit:
 597             formats = list(takewhile_inclusive(
 598                 lambda f: f['format_id'] != format_limit, formats
 599             ))
 600         if self.params.get('prefer_free_formats'):
 601             def _free_formats_key(f):
 602                 try:
 603                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 604                 except ValueError:
 605                     ext_ord = -1
 606                 # We only compare the extension if they have the same height and width
 607                 return (f.get('height'), f.get('width'), ext_ord)
 608             formats = sorted(formats, key=_free_formats_key)
 609
 610         req_format = self.params.get('format', 'best')
 611         if req_format is None:
 612             req_format = 'best'
 613         formats_to_download = []
 614         # The -1 is for supporting YoutubeIE
 615         if req_format in ('-1', 'all'):
 616             formats_to_download = formats
 617         else:
 618             # We can accept formats requestd in the format: 34/5/best, we pick
 619             # the first that is available, starting from left
 620             req_formats = req_format.split('/')
 621             for rf in req_formats:
 622                 selected_format = self.select_format(rf, formats)
 623                 if selected_format is not None:
 624                     formats_to_download = [selected_format]
 625                     break
 626         if not formats_to_download:
 627             raise ExtractorError(u'requested format not available',
 628                                  expected=True)
 629
 630         if download:
 631             if len(formats_to_download) > 1:
 632                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 633             for format in formats_to_download:
 634                 new_info = dict(info_dict)
 635                 new_info.update(format)
 636                 self.process_info(new_info)
 637         # We update the info dict with the best quality format (backwards compatibility)
 638         info_dict.update(formats_to_download[-1])
 639         return info_dict
 640
 641     def process_info(self, info_dict):
 642         """Process a single resolved IE result."""
 643
 644         assert info_dict.get('_type', 'video') == 'video'
 645         #We increment the download the download count here to match the previous behaviour.
 646         self.increment_downloads()
 647
 648         info_dict['fulltitle'] = info_dict['title']
 649         if len(info_dict['title']) > 200:
 650             info_dict['title'] = info_dict['title'][:197] + u'...'
 651
 652         # Keep for backwards compatibility
 653         info_dict['stitle'] = info_dict['title']
 654
 655         if not 'format' in info_dict:
 656             info_dict['format'] = info_dict['ext']
 657
 658         reason = self._match_entry(info_dict)
 659         if reason is not None:
 660             self.to_screen(u'[download] ' + reason)
 661             return
 662
 663         max_downloads = self.params.get('max_downloads')
 664         if max_downloads is not None:
 665             if self._num_downloads > int(max_downloads):
 666                 raise MaxDownloadsReached()
 667
 668         filename = self.prepare_filename(info_dict)
 669
 670         # Forced printings
 671         if self.params.get('forcetitle', False):
 672             compat_print(info_dict['fulltitle'])
 673         if self.params.get('forceid', False):
 674             compat_print(info_dict['id'])
 675         if self.params.get('forceurl', False):
 676             # For RTMP URLs, also include the playpath
 677             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 678         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 679             compat_print(info_dict['thumbnail'])
 680         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 681             compat_print(info_dict['description'])
 682         if self.params.get('forcefilename', False) and filename is not None:
 683             compat_print(filename)
 684         if self.params.get('forceformat', False):
 685             compat_print(info_dict['format'])
 686         if self.params.get('forcejson', False):
 687             compat_print(json.dumps(info_dict))
 688
 689         # Do nothing else if in simulate mode
 690         if self.params.get('simulate', False):
 691             return
 692
 693         if filename is None:
 694             return
 695
 696         try:
 697             dn = os.path.dirname(encodeFilename(filename))
 698             if dn != '' and not os.path.exists(dn):
 699                 os.makedirs(dn)
 700         except (OSError, IOError) as err:
 701             self.report_error(u'unable to create directory ' + compat_str(err))
 702             return
 703
 704         if self.params.get('writedescription', False):
 705             try:
 706                 descfn = filename + u'.description'
 707                 self.report_writedescription(descfn)
 708                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 709                     descfile.write(info_dict['description'])
 710             except (KeyError, TypeError):
 711                 self.report_warning(u'There\'s no description to write.')
 712             except (OSError, IOError):
 713                 self.report_error(u'Cannot write description file ' + descfn)
 714                 return
 715
 716         if self.params.get('writeannotations', False):
 717             try:
 718                 annofn = filename + u'.annotations.xml'
 719                 self.report_writeannotations(annofn)
 720                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 721                     annofile.write(info_dict['annotations'])
 722             except (KeyError, TypeError):
 723                 self.report_warning(u'There are no annotations to write.')
 724             except (OSError, IOError):
 725                 self.report_error(u'Cannot write annotations file: ' + annofn)
 726                 return
 727
 728         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 729                                        self.params.get('writeautomaticsub')])
 730
 731         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 732             # subtitles download errors are already managed as troubles in relevant IE
 733             # that way it will silently go on when used with unsupporting IE
 734             subtitles = info_dict['subtitles']
 735             sub_format = self.params.get('subtitlesformat', 'srt')
 736             for sub_lang in subtitles.keys():
 737                 sub = subtitles[sub_lang]
 738                 if sub is None:
 739                     continue
 740                 try:
 741                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 742                     self.report_writesubtitles(sub_filename)
 743                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 744                             subfile.write(sub)
 745                 except (OSError, IOError):
 746                     self.report_error(u'Cannot write subtitles file ' + descfn)
 747                     return
 748
 749         if self.params.get('writeinfojson', False):
 750             infofn = os.path.splitext(filename)[0] + u'.info.json'
 751             self.report_writeinfojson(infofn)
 752             try:
 753                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 754                 write_json_file(json_info_dict, encodeFilename(infofn))
 755             except (OSError, IOError):
 756                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 757                 return
 758
 759         if self.params.get('writethumbnail', False):
 760             if info_dict.get('thumbnail') is not None:
 761                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 762                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 763                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 764                                (info_dict['extractor'], info_dict['id']))
 765                 try:
 766                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 767                     with open(thumb_filename, 'wb') as thumbf:
 768                         shutil.copyfileobj(uf, thumbf)
 769                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 770                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 771                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 772                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 773                         (info_dict['thumbnail'], compat_str(err)))
 774
 775         if not self.params.get('skip_download', False):
 776             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 777                 success = True
 778             else:
 779                 try:
 780                     success = self.fd._do_download(filename, info_dict)
 781                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 782                     self.report_error(u'unable to download video data: %s' % str(err))
 783                     return
 784                 except (OSError, IOError) as err:
 785                     raise UnavailableVideoError(err)
 786                 except (ContentTooShortError, ) as err:
 787                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 788                     return
 789
 790             if success:
 791                 try:
 792                     self.post_process(filename, info_dict)
 793                 except (PostProcessingError) as err:
 794                     self.report_error(u'postprocessing: %s' % str(err))
 795                     return
 796
 797         self.record_download_archive(info_dict)
 798
 799     def download(self, url_list):
 800         """Download a given list of URLs."""
 801         if len(url_list) > 1 and self.fixed_template():
 802             raise SameFileError(self.params['outtmpl'])
 803
 804         for url in url_list:
 805             try:
 806                 #It also downloads the videos
 807                 self.extract_info(url)
 808             except UnavailableVideoError:
 809                 self.report_error(u'unable to download video')
 810             except MaxDownloadsReached:
 811                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 812                 raise
 813
 814         return self._download_retcode
 815
 816     def post_process(self, filename, ie_info):
 817         """Run all the postprocessors on the given file."""
 818         info = dict(ie_info)
 819         info['filepath'] = filename
 820         keep_video = None
 821         for pp in self._pps:
 822             try:
 823                 keep_video_wish, new_info = pp.run(info)
 824                 if keep_video_wish is not None:
 825                     if keep_video_wish:
 826                         keep_video = keep_video_wish
 827                     elif keep_video is None:
 828                         # No clear decision yet, let IE decide
 829                         keep_video = keep_video_wish
 830             except PostProcessingError as e:
 831                 self.report_error(e.msg)
 832         if keep_video is False and not self.params.get('keepvideo', False):
 833             try:
 834                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 835                 os.remove(encodeFilename(filename))
 836             except (IOError, OSError):
 837                 self.report_warning(u'Unable to remove downloaded video file')
 838
 839     def _make_archive_id(self, info_dict):
 840         # Future-proof against any change in case
 841         # and backwards compatibility with prior versions
 842         extractor = info_dict.get('extractor')
 843         if extractor is None:
 844             if 'id' in info_dict:
 845                 extractor = info_dict.get('ie_key')  # key in a playlist
 846         if extractor is None:
 847             return None  # Incomplete video information
 848         return extractor.lower() + u' ' + info_dict['id']
 849
 850     def in_download_archive(self, info_dict):
 851         fn = self.params.get('download_archive')
 852         if fn is None:
 853             return False
 854
 855         vid_id = self._make_archive_id(info_dict)
 856         if vid_id is None:
 857             return False  # Incomplete video information
 858
 859         try:
 860             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 861                 for line in archive_file:
 862                     if line.strip() == vid_id:
 863                         return True
 864         except IOError as ioe:
 865             if ioe.errno != errno.ENOENT:
 866                 raise
 867         return False
 868
 869     def record_download_archive(self, info_dict):
 870         fn = self.params.get('download_archive')
 871         if fn is None:
 872             return
 873         vid_id = self._make_archive_id(info_dict)
 874         assert vid_id
 875         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 876             archive_file.write(vid_id + u'\n')
 877
 878     @staticmethod
 879     def format_resolution(format, default='unknown'):
 880         if format.get('_resolution') is not None:
 881             return format['_resolution']
 882         if format.get('height') is not None:
 883             if format.get('width') is not None:
 884                 res = u'%sx%s' % (format['width'], format['height'])
 885             else:
 886                 res = u'%sp' % format['height']
 887         else:
 888             res = default
 889         return res
 890
 891     def list_formats(self, info_dict):
 892         def format_note(fdict):
 893             res = u''
 894             if fdict.get('format_note') is not None:
 895                 res += fdict['format_note'] + u' '
 896             if fdict.get('vcodec') is not None:
 897                 res += u'%-5s' % fdict['vcodec']
 898             elif fdict.get('vbr') is not None:
 899                 res += u'video'
 900             if fdict.get('vbr') is not None:
 901                 res += u'@%4dk' % fdict['vbr']
 902             if fdict.get('acodec') is not None:
 903                 if res:
 904                     res += u', '
 905                 res += u'%-5s' % fdict['acodec']
 906             elif fdict.get('abr') is not None:
 907                 if res:
 908                     res += u', '
 909                 res += 'audio'
 910             if fdict.get('abr') is not None:
 911                 res += u'@%3dk' % fdict['abr']
 912             if fdict.get('filesize') is not None:
 913                 if res:
 914                     res += u', '
 915                 res += format_bytes(fdict['filesize'])
 916             return res
 917
 918         def line(format, idlen=20):
 919             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
 920                 format['format_id'],
 921                 format['ext'],
 922                 self.format_resolution(format),
 923                 format_note(format),
 924             ))
 925
 926         formats = info_dict.get('formats', [info_dict])
 927         idlen = max(len(u'format code'),
 928                     max(len(f['format_id']) for f in formats))
 929         formats_s = [line(f, idlen) for f in formats]
 930         if len(formats) > 1:
 931             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 932             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 933
 934         header_line = line({
 935             'format_id': u'format code', 'ext': u'extension',
 936             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
 937         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 938                        (info_dict['id'], header_line, u"\n".join(formats_s)))
 939
 940     def urlopen(self, req):
 941         """ Start an HTTP download """
 942         return self._opener.open(req)
 943
 944     def print_debug_header(self):
 945         if not self.params.get('verbose'):
 946             return
 947         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 948         try:
 949             sp = subprocess.Popen(
 950                 ['git', 'rev-parse', '--short', 'HEAD'],
 951                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 952                 cwd=os.path.dirname(os.path.abspath(__file__)))
 953             out, err = sp.communicate()
 954             out = out.decode().strip()
 955             if re.match('[0-9a-f]+', out):
 956                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
 957         except:
 958             try:
 959                 sys.exc_clear()
 960             except:
 961                 pass
 962         write_string(u'[debug] Python version %s - %s' %
 963                      (platform.python_version(), platform_name()) + u'\n')
 964
 965         proxy_map = {}
 966         for handler in self._opener.handlers:
 967             if hasattr(handler, 'proxies'):
 968                 proxy_map.update(handler.proxies)
 969         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 970
 971     def _setup_opener(self, timeout=300):
 972         opts_cookiefile = self.params.get('cookiefile')
 973         opts_proxy = self.params.get('proxy')
 974
 975         if opts_cookiefile is None:
 976             self.cookiejar = compat_cookiejar.CookieJar()
 977         else:
 978             self.cookiejar = compat_cookiejar.MozillaCookieJar(
 979                 opts_cookiefile)
 980             if os.access(opts_cookiefile, os.R_OK):
 981                 self.cookiejar.load()
 982
 983         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
 984             self.cookiejar)
 985         if opts_proxy is not None:
 986             if opts_proxy == '':
 987                 proxies = {}
 988             else:
 989                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 990         else:
 991             proxies = compat_urllib_request.getproxies()
 992             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 993             if 'http' in proxies and 'https' not in proxies:
 994                 proxies['https'] = proxies['http']
 995         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 996         https_handler = make_HTTPS_handler(
 997             self.params.get('nocheckcertificate', False))
 998         opener = compat_urllib_request.build_opener(
 999             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1000         # Delete the default user-agent header, which would otherwise apply in
1001         # cases where our custom HTTP handler doesn't come into play
1002         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1003         opener.addheaders = []
1004         self._opener = opener
1005
1006         # TODO remove this global modification
1007         compat_urllib_request.install_opener(opener)
1008         socket.setdefaulttimeout(timeout)