youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import io
   8 import os
   9 import re
  10 import socket
  11 import subprocess
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import *
  20
  21
  22 class FileDownloader(object):
  23     """File Downloader class.
  24
  25     File downloader objects are the ones responsible of downloading the
  26     actual video file and writing it to disk if the user has requested
  27     it, among some other tasks. In most cases there should be one per
  28     program. As, given a video URL, the downloader doesn't know how to
  29     extract all the needed information, task that InfoExtractors do, it
  30     has to pass the URL to one of them.
  31
  32     For this, file downloader objects have a method that allows
  33     InfoExtractors to be registered in a given order. When it is passed
  34     a URL, the file downloader handles it to the first InfoExtractor it
  35     finds that reports being able to handle it. The InfoExtractor extracts
  36     all the information about the video or videos the URL refers to, and
  37     asks the FileDownloader to process the video information, possibly
  38     downloading the video.
  39
  40     File downloaders accept a lot of parameters. In order not to saturate
  41     the object constructor with arguments, it receives a dictionary of
  42     options instead. These options are available through the params
  43     attribute for the InfoExtractors to use. The FileDownloader also
  44     registers itself as the downloader in charge for the InfoExtractors
  45     that are added to it, so this is a "mutual registration".
  46
  47     Available options:
  48
  49     username:          Username for authentication purposes.
  50     password:          Password for authentication purposes.
  51     usenetrc:          Use netrc for authentication instead.
  52     quiet:             Do not print messages to stdout.
  53     forceurl:          Force printing final URL.
  54     forcetitle:        Force printing title.
  55     forcethumbnail:    Force printing thumbnail URL.
  56     forcedescription:  Force printing description.
  57     forcefilename:     Force printing final filename.
  58     simulate:          Do not download the video files.
  59     format:            Video format code.
  60     format_limit:      Highest quality format to try.
  61     outtmpl:           Template for output names.
  62     restrictfilenames: Do not allow "&" and spaces in file names
  63     ignoreerrors:      Do not stop on download errors.
  64     ratelimit:         Download speed limit, in bytes/sec.
  65     nooverwrites:      Prevent overwriting files.
  66     retries:           Number of times to retry for HTTP error 5xx
  67     buffersize:        Size of download buffer in bytes.
  68     noresizebuffer:    Do not automatically resize the download buffer.
  69     continuedl:        Try to continue downloads if possible.
  70     noprogress:        Do not print the progress bar.
  71     playliststart:     Playlist item to start at.
  72     playlistend:       Playlist item to end at.
  73     matchtitle:        Download only matching titles.
  74     rejecttitle:       Reject downloads for matching titles.
  75     logtostderr:       Log messages to stderr instead of stdout.
  76     consoletitle:      Display progress in console window's titlebar.
  77     nopart:            Do not use temporary .part files.
  78     updatetime:        Use the Last-modified header to set output file timestamps.
  79     writedescription:  Write the video description to a .description file
  80     writeinfojson:     Write the video description to a .info.json file
  81     writesubtitles:    Write the video subtitles to a file (default=srt)
  82     onlysubtitles:     Downloads only the subtitles of the video
  83     allsubtitles:      Downloads all the subtitles of the video
  84     subtitleslang:     Language of the subtitles to download
  85     test:              Download only first bytes to test the downloader.
  86     keepvideo:         Keep the video file after post-processing
  87     min_filesize:      Skip files smaller than this size
  88     max_filesize:      Skip files larger than this size
  89     """
  90
  91     params = None
  92     _ies = []
  93     _pps = []
  94     _download_retcode = None
  95     _num_downloads = None
  96     _screen_file = None
  97
  98     def __init__(self, params):
  99         """Create a FileDownloader object with the given options."""
 100         self._ies = []
 101         self._pps = []
 102         self._progress_hooks = []
 103         self._download_retcode = 0
 104         self._num_downloads = 0
 105         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 106         self.params = params
 107
 108         if '%(stitle)s' in self.params['outtmpl']:
 109             self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 110
 111     @staticmethod
 112     def format_bytes(bytes):
 113         if bytes is None:
 114             return 'N/A'
 115         if type(bytes) is str:
 116             bytes = float(bytes)
 117         if bytes == 0.0:
 118             exponent = 0
 119         else:
 120             exponent = int(math.log(bytes, 1024.0))
 121         suffix = 'bkMGTPEZY'[exponent]
 122         converted = float(bytes) / float(1024 ** exponent)
 123         return '%.2f%s' % (converted, suffix)
 124
 125     @staticmethod
 126     def calc_percent(byte_counter, data_len):
 127         if data_len is None:
 128             return '---.-%'
 129         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 130
 131     @staticmethod
 132     def calc_eta(start, now, total, current):
 133         if total is None:
 134             return '--:--'
 135         dif = now - start
 136         if current == 0 or dif < 0.001: # One millisecond
 137             return '--:--'
 138         rate = float(current) / dif
 139         eta = int((float(total) - float(current)) / rate)
 140         (eta_mins, eta_secs) = divmod(eta, 60)
 141         if eta_mins > 99:
 142             return '--:--'
 143         return '%02d:%02d' % (eta_mins, eta_secs)
 144
 145     @staticmethod
 146     def calc_speed(start, now, bytes):
 147         dif = now - start
 148         if bytes == 0 or dif < 0.001: # One millisecond
 149             return '%10s' % '---b/s'
 150         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 151
 152     @staticmethod
 153     def best_block_size(elapsed_time, bytes):
 154         new_min = max(bytes / 2.0, 1.0)
 155         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 156         if elapsed_time < 0.001:
 157             return int(new_max)
 158         rate = bytes / elapsed_time
 159         if rate > new_max:
 160             return int(new_max)
 161         if rate < new_min:
 162             return int(new_min)
 163         return int(rate)
 164
 165     @staticmethod
 166     def parse_bytes(bytestr):
 167         """Parse a string indicating a byte quantity into an integer."""
 168         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 169         if matchobj is None:
 170             return None
 171         number = float(matchobj.group(1))
 172         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 173         return int(round(number * multiplier))
 174
 175     def add_info_extractor(self, ie):
 176         """Add an InfoExtractor object to the end of the list."""
 177         self._ies.append(ie)
 178         ie.set_downloader(self)
 179
 180     def add_post_processor(self, pp):
 181         """Add a PostProcessor object to the end of the chain."""
 182         self._pps.append(pp)
 183         pp.set_downloader(self)
 184
 185     def to_screen(self, message, skip_eol=False):
 186         """Print message to stdout if not in quiet mode."""
 187         assert type(message) == type(u'')
 188         if not self.params.get('quiet', False):
 189             terminator = [u'\n', u''][skip_eol]
 190             output = message + terminator
 191             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 192                 output = output.encode(preferredencoding(), 'ignore')
 193             self._screen_file.write(output)
 194             self._screen_file.flush()
 195
 196     def to_stderr(self, message):
 197         """Print message to stderr."""
 198         assert type(message) == type(u'')
 199         output = message + u'\n'
 200         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 201             output = output.encode(preferredencoding())
 202         sys.stderr.write(output)
 203
 204     def to_cons_title(self, message):
 205         """Set console/terminal window title to message."""
 206         if not self.params.get('consoletitle', False):
 207             return
 208         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 209             # c_wchar_p() might not be necessary if `message` is
 210             # already of type unicode()
 211             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 212         elif 'TERM' in os.environ:
 213             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 214
 215     def fixed_template(self):
 216         """Checks if the output template is fixed."""
 217         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 218
 219     def trouble(self, message=None, tb=None):
 220         """Determine action to take when a download problem appears.
 221
 222         Depending on if the downloader has been configured to ignore
 223         download errors or not, this method may throw an exception or
 224         not when errors are found, after printing the message.
 225
 226         tb, if given, is additional traceback information.
 227         """
 228         if message is not None:
 229             self.to_stderr(message)
 230         if self.params.get('verbose'):
 231             if tb is None:
 232                 tb_data = traceback.format_list(traceback.extract_stack())
 233                 tb = u''.join(tb_data)
 234             self.to_stderr(tb)
 235         if not self.params.get('ignoreerrors', False):
 236             raise DownloadError(message)
 237         self._download_retcode = 1
 238
 239     def slow_down(self, start_time, byte_counter):
 240         """Sleep if the download speed is over the rate limit."""
 241         rate_limit = self.params.get('ratelimit', None)
 242         if rate_limit is None or byte_counter == 0:
 243             return
 244         now = time.time()
 245         elapsed = now - start_time
 246         if elapsed <= 0.0:
 247             return
 248         speed = float(byte_counter) / elapsed
 249         if speed > rate_limit:
 250             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 251
 252     def temp_name(self, filename):
 253         """Returns a temporary filename for the given filename."""
 254         if self.params.get('nopart', False) or filename == u'-' or \
 255                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 256             return filename
 257         return filename + u'.part'
 258
 259     def undo_temp_name(self, filename):
 260         if filename.endswith(u'.part'):
 261             return filename[:-len(u'.part')]
 262         return filename
 263
 264     def try_rename(self, old_filename, new_filename):
 265         try:
 266             if old_filename == new_filename:
 267                 return
 268             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 269         except (IOError, OSError) as err:
 270             self.trouble(u'ERROR: unable to rename file')
 271
 272     def try_utime(self, filename, last_modified_hdr):
 273         """Try to set the last-modified time of the given file."""
 274         if last_modified_hdr is None:
 275             return
 276         if not os.path.isfile(encodeFilename(filename)):
 277             return
 278         timestr = last_modified_hdr
 279         if timestr is None:
 280             return
 281         filetime = timeconvert(timestr)
 282         if filetime is None:
 283             return filetime
 284         try:
 285             os.utime(filename, (time.time(), filetime))
 286         except:
 287             pass
 288         return filetime
 289
 290     def report_writedescription(self, descfn):
 291         """ Report that the description file is being written """
 292         self.to_screen(u'[info] Writing video description to: ' + descfn)
 293
 294     def report_writesubtitles(self, sub_filename):
 295         """ Report that the subtitles file is being written """
 296         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 297
 298     def report_writeinfojson(self, infofn):
 299         """ Report that the metadata file has been written """
 300         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 301
 302     def report_destination(self, filename):
 303         """Report destination filename."""
 304         self.to_screen(u'[download] Destination: ' + filename)
 305
 306     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 307         """Report download progress."""
 308         if self.params.get('noprogress', False):
 309             return
 310         if self.params.get('progress_with_newline', False):
 311             self.to_screen(u'[download] %s of %s at %s ETA %s' %
 312                 (percent_str, data_len_str, speed_str, eta_str))
 313         else:
 314             self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 315                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 316         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 317                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 318
 319     def report_resuming_byte(self, resume_len):
 320         """Report attempt to resume at given byte."""
 321         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 322
 323     def report_retry(self, count, retries):
 324         """Report retry in case of HTTP error 5xx"""
 325         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 326
 327     def report_file_already_downloaded(self, file_name):
 328         """Report file has already been fully downloaded."""
 329         try:
 330             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 331         except (UnicodeEncodeError) as err:
 332             self.to_screen(u'[download] The file has already been downloaded')
 333
 334     def report_unable_to_resume(self):
 335         """Report it was impossible to resume download."""
 336         self.to_screen(u'[download] Unable to resume')
 337
 338     def report_finish(self):
 339         """Report download finished."""
 340         if self.params.get('noprogress', False):
 341             self.to_screen(u'[download] Download completed')
 342         else:
 343             self.to_screen(u'')
 344
 345     def increment_downloads(self):
 346         """Increment the ordinal that assigns a number to each file."""
 347         self._num_downloads += 1
 348
 349     def prepare_filename(self, info_dict):
 350         """Generate the output filename."""
 351         try:
 352             template_dict = dict(info_dict)
 353
 354             template_dict['epoch'] = int(time.time())
 355             template_dict['autonumber'] = u'%05d' % self._num_downloads
 356
 357             sanitize = lambda k,v: sanitize_filename(
 358                 u'NA' if v is None else compat_str(v),
 359                 restricted=self.params.get('restrictfilenames'),
 360                 is_id=(k==u'id'))
 361             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 362
 363             filename = self.params['outtmpl'] % template_dict
 364             return filename
 365         except (ValueError, KeyError) as err:
 366             self.trouble(u'ERROR: invalid system charset or erroneous output template')
 367             return None
 368
 369     def _match_entry(self, info_dict):
 370         """ Returns None iff the file should be downloaded """
 371
 372         title = info_dict['title']
 373         matchtitle = self.params.get('matchtitle', False)
 374         if matchtitle:
 375             if not re.search(matchtitle, title, re.IGNORECASE):
 376                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 377         rejecttitle = self.params.get('rejecttitle', False)
 378         if rejecttitle:
 379             if re.search(rejecttitle, title, re.IGNORECASE):
 380                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 381         return None
 382
 383     def process_info(self, info_dict):
 384         """Process a single dictionary returned by an InfoExtractor."""
 385
 386         # Keep for backwards compatibility
 387         info_dict['stitle'] = info_dict['title']
 388
 389         if not 'format' in info_dict:
 390             info_dict['format'] = info_dict['ext']
 391
 392         reason = self._match_entry(info_dict)
 393         if reason is not None:
 394             self.to_screen(u'[download] ' + reason)
 395             return
 396
 397         max_downloads = self.params.get('max_downloads')
 398         if max_downloads is not None:
 399             if self._num_downloads > int(max_downloads):
 400                 raise MaxDownloadsReached()
 401
 402         filename = self.prepare_filename(info_dict)
 403
 404         # Forced printings
 405         if self.params.get('forcetitle', False):
 406             compat_print(info_dict['title'])
 407         if self.params.get('forceurl', False):
 408             compat_print(info_dict['url'])
 409         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 410             compat_print(info_dict['thumbnail'])
 411         if self.params.get('forcedescription', False) and 'description' in info_dict:
 412             compat_print(info_dict['description'])
 413         if self.params.get('forcefilename', False) and filename is not None:
 414             compat_print(filename)
 415         if self.params.get('forceformat', False):
 416             compat_print(info_dict['format'])
 417
 418         # Do nothing else if in simulate mode
 419         if self.params.get('simulate', False):
 420             return
 421
 422         if filename is None:
 423             return
 424
 425         try:
 426             dn = os.path.dirname(encodeFilename(filename))
 427             if dn != '' and not os.path.exists(dn): # dn is already encoded
 428                 os.makedirs(dn)
 429         except (OSError, IOError) as err:
 430             self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
 431             return
 432
 433         if self.params.get('writedescription', False):
 434             try:
 435                 descfn = filename + u'.description'
 436                 self.report_writedescription(descfn)
 437                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 438                     descfile.write(info_dict['description'])
 439             except (OSError, IOError):
 440                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 441                 return
 442
 443         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 444             # subtitles download errors are already managed as troubles in relevant IE
 445             # that way it will silently go on when used with unsupporting IE
 446             subtitle = info_dict['subtitles'][0]
 447             (sub_error, sub_lang, sub) = subtitle
 448             try:
 449                 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt'
 450                 self.report_writesubtitles(sub_filename)
 451                 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 452                     subfile.write(sub)
 453             except (OSError, IOError):
 454                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 455                 return
 456             if self.params.get('onlysubtitles', False):
 457                 return
 458
 459         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 460             subtitles = info_dict['subtitles']
 461             for subtitle in subtitles:
 462                 (sub_error, sub_lang, sub) = subtitle
 463                 try:
 464                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt'
 465                     self.report_writesubtitles(sub_filename)
 466                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 467                             subfile.write(sub)
 468                 except (OSError, IOError):
 469                     self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 470                     return
 471             if self.params.get('onlysubtitles', False):
 472                 return
 473
 474         if self.params.get('writeinfojson', False):
 475             infofn = filename + u'.info.json'
 476             self.report_writeinfojson(infofn)
 477             try:
 478                 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
 479                 write_json_file(json_info_dict, encodeFilename(infofn))
 480             except (OSError, IOError):
 481                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 482                 return
 483
 484         if not self.params.get('skip_download', False):
 485             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 486                 success = True
 487             else:
 488                 try:
 489                     success = self._do_download(filename, info_dict)
 490                 except (OSError, IOError) as err:
 491                     raise UnavailableVideoError()
 492                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 493                     self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 494                     return
 495                 except (ContentTooShortError, ) as err:
 496                     self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 497                     return
 498
 499             if success:
 500                 try:
 501                     self.post_process(filename, info_dict)
 502                 except (PostProcessingError) as err:
 503                     self.trouble(u'ERROR: postprocessing: %s' % str(err))
 504                     return
 505
 506     def download(self, url_list):
 507         """Download a given list of URLs."""
 508         if len(url_list) > 1 and self.fixed_template():
 509             raise SameFileError(self.params['outtmpl'])
 510
 511         for url in url_list:
 512             suitable_found = False
 513             for ie in self._ies:
 514                 # Go to next InfoExtractor if not suitable
 515                 if not ie.suitable(url):
 516                     continue
 517
 518                 # Warn if the _WORKING attribute is False
 519                 if not ie.working():
 520                     self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
 521                                    u'and will probably not work. If you want to go on, use the -i option.')
 522
 523                 # Suitable InfoExtractor found
 524                 suitable_found = True
 525
 526                 # Extract information from URL and process it
 527                 try:
 528                     videos = ie.extract(url)
 529                 except ExtractorError as de: # An error we somewhat expected
 530                     self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
 531                     break
 532                 except Exception as e:
 533                     if self.params.get('ignoreerrors', False):
 534                         self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
 535                         break
 536                     else:
 537                         raise
 538
 539                 if len(videos or []) > 1 and self.fixed_template():
 540                     raise SameFileError(self.params['outtmpl'])
 541
 542                 for video in videos or []:
 543                     video['extractor'] = ie.IE_NAME
 544                     try:
 545                         self.increment_downloads()
 546                         self.process_info(video)
 547                     except UnavailableVideoError:
 548                         self.trouble(u'\nERROR: unable to download video')
 549
 550                 # Suitable InfoExtractor had been found; go to next URL
 551                 break
 552
 553             if not suitable_found:
 554                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 555
 556         return self._download_retcode
 557
 558     def post_process(self, filename, ie_info):
 559         """Run all the postprocessors on the given file."""
 560         info = dict(ie_info)
 561         info['filepath'] = filename
 562         keep_video = None
 563         for pp in self._pps:
 564             try:
 565                 keep_video_wish,new_info = pp.run(info)
 566                 if keep_video_wish is not None:
 567                     if keep_video_wish:
 568                         keep_video = keep_video_wish
 569                     elif keep_video is None:
 570                         # No clear decision yet, let IE decide
 571                         keep_video = keep_video_wish
 572             except PostProcessingError as e:
 573                 self.to_stderr(u'ERROR: ' + e.msg)
 574         if keep_video is False and not self.params.get('keepvideo', False):
 575             try:
 576                 self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
 577                 os.remove(encodeFilename(filename))
 578             except (IOError, OSError):
 579                 self.to_stderr(u'WARNING: Unable to remove downloaded video file')
 580
 581     def _download_with_rtmpdump(self, filename, url, player_url, page_url):
 582         self.report_destination(filename)
 583         tmpfilename = self.temp_name(filename)
 584
 585         # Check for rtmpdump first
 586         try:
 587             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 588         except (OSError, IOError):
 589             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 590             return False
 591
 592         # Download using rtmpdump. rtmpdump returns exit code 2 when
 593         # the connection was interrumpted and resuming appears to be
 594         # possible. This is part of rtmpdump's normal usage, AFAIK.
 595         basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
 596         if player_url is not None:
 597             basic_args += ['-W', player_url]
 598         if page_url is not None:
 599             basic_args += ['--pageUrl', page_url]
 600         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 601         if self.params.get('verbose', False):
 602             try:
 603                 import pipes
 604                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 605             except ImportError:
 606                 shell_quote = repr
 607             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 608         retval = subprocess.call(args)
 609         while retval == 2 or retval == 1:
 610             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 611             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 612             time.sleep(5.0) # This seems to be needed
 613             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 614             cursize = os.path.getsize(encodeFilename(tmpfilename))
 615             if prevsize == cursize and retval == 1:
 616                 break
 617              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 618             if prevsize == cursize and retval == 2 and cursize > 1024:
 619                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 620                 retval = 0
 621                 break
 622         if retval == 0:
 623             fsize = os.path.getsize(encodeFilename(tmpfilename))
 624             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
 625             self.try_rename(tmpfilename, filename)
 626             self._hook_progress({
 627                 'downloaded_bytes': fsize,
 628                 'total_bytes': fsize,
 629                 'filename': filename,
 630                 'status': 'finished',
 631             })
 632             return True
 633         else:
 634             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 635             return False
 636
 637     def _do_download(self, filename, info_dict):
 638         url = info_dict['url']
 639
 640         # Check file already present
 641         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 642             self.report_file_already_downloaded(filename)
 643             self._hook_progress({
 644                 'filename': filename,
 645                 'status': 'finished',
 646             })
 647             return True
 648
 649         # Attempt to download using rtmpdump
 650         if url.startswith('rtmp'):
 651             return self._download_with_rtmpdump(filename, url,
 652                                                 info_dict.get('player_url', None),
 653                                                 info_dict.get('page_url', None))
 654
 655         tmpfilename = self.temp_name(filename)
 656         stream = None
 657
 658         # Do not include the Accept-Encoding header
 659         headers = {'Youtubedl-no-compression': 'True'}
 660         if 'user_agent' in info_dict:
 661             headers['Youtubedl-user-agent'] = info_dict['user_agent']
 662         basic_request = compat_urllib_request.Request(url, None, headers)
 663         request = compat_urllib_request.Request(url, None, headers)
 664
 665         if self.params.get('test', False):
 666             request.add_header('Range','bytes=0-10240')
 667
 668         # Establish possible resume length
 669         if os.path.isfile(encodeFilename(tmpfilename)):
 670             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 671         else:
 672             resume_len = 0
 673
 674         open_mode = 'wb'
 675         if resume_len != 0:
 676             if self.params.get('continuedl', False):
 677                 self.report_resuming_byte(resume_len)
 678                 request.add_header('Range','bytes=%d-' % resume_len)
 679                 open_mode = 'ab'
 680             else:
 681                 resume_len = 0
 682
 683         count = 0
 684         retries = self.params.get('retries', 0)
 685         while count <= retries:
 686             # Establish connection
 687             try:
 688                 if count == 0 and 'urlhandle' in info_dict:
 689                     data = info_dict['urlhandle']
 690                 data = compat_urllib_request.urlopen(request)
 691                 break
 692             except (compat_urllib_error.HTTPError, ) as err:
 693                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 694                     # Unexpected HTTP error
 695                     raise
 696                 elif err.code == 416:
 697                     # Unable to resume (requested range not satisfiable)
 698                     try:
 699                         # Open the connection again without the range header
 700                         data = compat_urllib_request.urlopen(basic_request)
 701                         content_length = data.info()['Content-Length']
 702                     except (compat_urllib_error.HTTPError, ) as err:
 703                         if err.code < 500 or err.code >= 600:
 704                             raise
 705                     else:
 706                         # Examine the reported length
 707                         if (content_length is not None and
 708                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 709                             # The file had already been fully downloaded.
 710                             # Explanation to the above condition: in issue #175 it was revealed that
 711                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 712                             # changing the file size slightly and causing problems for some users. So
 713                             # I decided to implement a suggested change and consider the file
 714                             # completely downloaded if the file size differs less than 100 bytes from
 715                             # the one in the hard drive.
 716                             self.report_file_already_downloaded(filename)
 717                             self.try_rename(tmpfilename, filename)
 718                             self._hook_progress({
 719                                 'filename': filename,
 720                                 'status': 'finished',
 721                             })
 722                             return True
 723                         else:
 724                             # The length does not match, we start the download over
 725                             self.report_unable_to_resume()
 726                             open_mode = 'wb'
 727                             break
 728             # Retry
 729             count += 1
 730             if count <= retries:
 731                 self.report_retry(count, retries)
 732
 733         if count > retries:
 734             self.trouble(u'ERROR: giving up after %s retries' % retries)
 735             return False
 736
 737         data_len = data.info().get('Content-length', None)
 738         if data_len is not None:
 739             data_len = int(data_len) + resume_len
 740             min_data_len = self.params.get("min_filesize", None)
 741             max_data_len =  self.params.get("max_filesize", None)
 742             if min_data_len is not None and data_len < min_data_len:
 743                 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
 744                 return False
 745             if max_data_len is not None and data_len > max_data_len:
 746                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
 747                 return False
 748
 749         data_len_str = self.format_bytes(data_len)
 750         byte_counter = 0 + resume_len
 751         block_size = self.params.get('buffersize', 1024)
 752         start = time.time()
 753         while True:
 754             # Download and write
 755             before = time.time()
 756             data_block = data.read(block_size)
 757             after = time.time()
 758             if len(data_block) == 0:
 759                 break
 760             byte_counter += len(data_block)
 761
 762             # Open file just in time
 763             if stream is None:
 764                 try:
 765                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 766                     assert stream is not None
 767                     filename = self.undo_temp_name(tmpfilename)
 768                     self.report_destination(filename)
 769                 except (OSError, IOError) as err:
 770                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 771                     return False
 772             try:
 773                 stream.write(data_block)
 774             except (IOError, OSError) as err:
 775                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 776                 return False
 777             if not self.params.get('noresizebuffer', False):
 778                 block_size = self.best_block_size(after - before, len(data_block))
 779
 780             # Progress message
 781             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 782             if data_len is None:
 783                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 784             else:
 785                 percent_str = self.calc_percent(byte_counter, data_len)
 786                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 787                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 788
 789             self._hook_progress({
 790                 'downloaded_bytes': byte_counter,
 791                 'total_bytes': data_len,
 792                 'tmpfilename': tmpfilename,
 793                 'filename': filename,
 794                 'status': 'downloading',
 795             })
 796
 797             # Apply rate limit
 798             self.slow_down(start, byte_counter - resume_len)
 799
 800         if stream is None:
 801             self.trouble(u'\nERROR: Did not get any data blocks')
 802             return False
 803         stream.close()
 804         self.report_finish()
 805         if data_len is not None and byte_counter != data_len:
 806             raise ContentTooShortError(byte_counter, int(data_len))
 807         self.try_rename(tmpfilename, filename)
 808
 809         # Update file modification time
 810         if self.params.get('updatetime', True):
 811             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 812
 813         self._hook_progress({
 814             'downloaded_bytes': byte_counter,
 815             'total_bytes': byte_counter,
 816             'filename': filename,
 817             'status': 'finished',
 818         })
 819
 820         return True
 821
 822     def _hook_progress(self, status):
 823         for ph in self._progress_hooks:
 824             ph(status)
 825
 826     def add_progress_hook(self, ph):
 827         """ ph gets called on download progress, with a dictionary with the entries
 828         * filename: The final filename
 829         * status: One of "downloading" and "finished"
 830
 831         It can also have some of the following entries:
 832
 833         * downloaded_bytes: Bytes on disks
 834         * total_bytes: Total bytes, None if unknown
 835         * tmpfilename: The filename we're currently writing to
 836
 837         Hooks are guaranteed to be called at least once (with status "finished")
 838         if the download is successful.
 839         """
 840         self._progress_hooks.append(ph)