youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import httplib
   5 import math
   6 import os
   7 import re
   8 import socket
   9 import subprocess
  10 import sys
  11 import time
  12 import urllib2
  13
  14 if os.name == 'nt':
  15         import ctypes
  16
  17 from utils import *
  18
  19
  20 class FileDownloader(object):
  21         """File Downloader class.
  22
  23         File downloader objects are the ones responsible of downloading the
  24         actual video file and writing it to disk if the user has requested
  25         it, among some other tasks. In most cases there should be one per
  26         program. As, given a video URL, the downloader doesn't know how to
  27         extract all the needed information, task that InfoExtractors do, it
  28         has to pass the URL to one of them.
  29
  30         For this, file downloader objects have a method that allows
  31         InfoExtractors to be registered in a given order. When it is passed
  32         a URL, the file downloader handles it to the first InfoExtractor it
  33         finds that reports being able to handle it. The InfoExtractor extracts
  34         all the information about the video or videos the URL refers to, and
  35         asks the FileDownloader to process the video information, possibly
  36         downloading the video.
  37
  38         File downloaders accept a lot of parameters. In order not to saturate
  39         the object constructor with arguments, it receives a dictionary of
  40         options instead. These options are available through the params
  41         attribute for the InfoExtractors to use. The FileDownloader also
  42         registers itself as the downloader in charge for the InfoExtractors
  43         that are added to it, so this is a "mutual registration".
  44
  45         Available options:
  46
  47         username:         Username for authentication purposes.
  48         password:         Password for authentication purposes.
  49         usenetrc:         Use netrc for authentication instead.
  50         quiet:            Do not print messages to stdout.
  51         forceurl:         Force printing final URL.
  52         forcetitle:       Force printing title.
  53         forcethumbnail:   Force printing thumbnail URL.
  54         forcedescription: Force printing description.
  55         forcefilename:    Force printing final filename.
  56         simulate:         Do not download the video files.
  57         format:           Video format code.
  58         format_limit:     Highest quality format to try.
  59         outtmpl:          Template for output names.
  60         ignoreerrors:     Do not stop on download errors.
  61         ratelimit:        Download speed limit, in bytes/sec.
  62         nooverwrites:     Prevent overwriting files.
  63         retries:          Number of times to retry for HTTP error 5xx
  64         continuedl:       Try to continue downloads if possible.
  65         noprogress:       Do not print the progress bar.
  66         playliststart:    Playlist item to start at.
  67         playlistend:      Playlist item to end at.
  68         matchtitle:       Download only matching titles.
  69         rejecttitle:      Reject downloads for matching titles.
  70         logtostderr:      Log messages to stderr instead of stdout.
  71         consoletitle:     Display progress in console window's titlebar.
  72         nopart:           Do not use temporary .part files.
  73         updatetime:       Use the Last-modified header to set output file timestamps.
  74         writedescription: Write the video description to a .description file
  75         writeinfojson:    Write the video description to a .info.json file
  76         writesubtitles:   Write the video subtitles to a .srt file
  77         subtitleslang:    Language of the subtitles to download
  78         """
  79
  80         params = None
  81         _ies = []
  82         _pps = []
  83         _download_retcode = None
  84         _num_downloads = None
  85         _screen_file = None
  86
  87         def __init__(self, params):
  88                 """Create a FileDownloader object with the given options."""
  89                 self._ies = []
  90                 self._pps = []
  91                 self._download_retcode = 0
  92                 self._num_downloads = 0
  93                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  94                 self.params = params
  95
  96         @staticmethod
  97         def format_bytes(bytes):
  98                 if bytes is None:
  99                         return 'N/A'
 100                 if type(bytes) is str:
 101                         bytes = float(bytes)
 102                 if bytes == 0.0:
 103                         exponent = 0
 104                 else:
 105                         exponent = long(math.log(bytes, 1024.0))
 106                 suffix = 'bkMGTPEZY'[exponent]
 107                 converted = float(bytes) / float(1024 ** exponent)
 108                 return '%.2f%s' % (converted, suffix)
 109
 110         @staticmethod
 111         def calc_percent(byte_counter, data_len):
 112                 if data_len is None:
 113                         return '---.-%'
 114                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 115
 116         @staticmethod
 117         def calc_eta(start, now, total, current):
 118                 if total is None:
 119                         return '--:--'
 120                 dif = now - start
 121                 if current == 0 or dif < 0.001: # One millisecond
 122                         return '--:--'
 123                 rate = float(current) / dif
 124                 eta = long((float(total) - float(current)) / rate)
 125                 (eta_mins, eta_secs) = divmod(eta, 60)
 126                 if eta_mins > 99:
 127                         return '--:--'
 128                 return '%02d:%02d' % (eta_mins, eta_secs)
 129
 130         @staticmethod
 131         def calc_speed(start, now, bytes):
 132                 dif = now - start
 133                 if bytes == 0 or dif < 0.001: # One millisecond
 134                         return '%10s' % '---b/s'
 135                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 136
 137         @staticmethod
 138         def best_block_size(elapsed_time, bytes):
 139                 new_min = max(bytes / 2.0, 1.0)
 140                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 141                 if elapsed_time < 0.001:
 142                         return long(new_max)
 143                 rate = bytes / elapsed_time
 144                 if rate > new_max:
 145                         return long(new_max)
 146                 if rate < new_min:
 147                         return long(new_min)
 148                 return long(rate)
 149
 150         @staticmethod
 151         def parse_bytes(bytestr):
 152                 """Parse a string indicating a byte quantity into a long integer."""
 153                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 154                 if matchobj is None:
 155                         return None
 156                 number = float(matchobj.group(1))
 157                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 158                 return long(round(number * multiplier))
 159
 160         def add_info_extractor(self, ie):
 161                 """Add an InfoExtractor object to the end of the list."""
 162                 self._ies.append(ie)
 163                 ie.set_downloader(self)
 164
 165         def add_post_processor(self, pp):
 166                 """Add a PostProcessor object to the end of the chain."""
 167                 self._pps.append(pp)
 168                 pp.set_downloader(self)
 169
 170         def to_screen(self, message, skip_eol=False):
 171                 """Print message to stdout if not in quiet mode."""
 172                 assert type(message) == type(u'')
 173                 if not self.params.get('quiet', False):
 174                         terminator = [u'\n', u''][skip_eol]
 175                         output = message + terminator
 176                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 177                                 output = output.encode(preferredencoding(), 'ignore')
 178                         self._screen_file.write(output)
 179                         self._screen_file.flush()
 180
 181         def to_stderr(self, message):
 182                 """Print message to stderr."""
 183                 print >>sys.stderr, message.encode(preferredencoding())
 184
 185         def to_cons_title(self, message):
 186                 """Set console/terminal window title to message."""
 187                 if not self.params.get('consoletitle', False):
 188                         return
 189                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 190                         # c_wchar_p() might not be necessary if `message` is
 191                         # already of type unicode()
 192                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 193                 elif 'TERM' in os.environ:
 194                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 195
 196         def fixed_template(self):
 197                 """Checks if the output template is fixed."""
 198                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 199
 200         def trouble(self, message=None):
 201                 """Determine action to take when a download problem appears.
 202
 203                 Depending on if the downloader has been configured to ignore
 204                 download errors or not, this method may throw an exception or
 205                 not when errors are found, after printing the message.
 206                 """
 207                 if message is not None:
 208                         self.to_stderr(message)
 209                 if not self.params.get('ignoreerrors', False):
 210                         raise DownloadError(message)
 211                 self._download_retcode = 1
 212
 213         def slow_down(self, start_time, byte_counter):
 214                 """Sleep if the download speed is over the rate limit."""
 215                 rate_limit = self.params.get('ratelimit', None)
 216                 if rate_limit is None or byte_counter == 0:
 217                         return
 218                 now = time.time()
 219                 elapsed = now - start_time
 220                 if elapsed <= 0.0:
 221                         return
 222                 speed = float(byte_counter) / elapsed
 223                 if speed > rate_limit:
 224                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 225
 226         def temp_name(self, filename):
 227                 """Returns a temporary filename for the given filename."""
 228                 if self.params.get('nopart', False) or filename == u'-' or \
 229                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 230                         return filename
 231                 return filename + u'.part'
 232
 233         def undo_temp_name(self, filename):
 234                 if filename.endswith(u'.part'):
 235                         return filename[:-len(u'.part')]
 236                 return filename
 237
 238         def try_rename(self, old_filename, new_filename):
 239                 try:
 240                         if old_filename == new_filename:
 241                                 return
 242                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 243                 except (IOError, OSError), err:
 244                         self.trouble(u'ERROR: unable to rename file')
 245
 246         def try_utime(self, filename, last_modified_hdr):
 247                 """Try to set the last-modified time of the given file."""
 248                 if last_modified_hdr is None:
 249                         return
 250                 if not os.path.isfile(encodeFilename(filename)):
 251                         return
 252                 timestr = last_modified_hdr
 253                 if timestr is None:
 254                         return
 255                 filetime = timeconvert(timestr)
 256                 if filetime is None:
 257                         return filetime
 258                 try:
 259                         os.utime(filename, (time.time(), filetime))
 260                 except:
 261                         pass
 262                 return filetime
 263
 264         def report_writedescription(self, descfn):
 265                 """ Report that the description file is being written """
 266                 self.to_screen(u'[info] Writing video description to: ' + descfn)
 267
 268         def report_writesubtitles(self, srtfn):
 269                 """ Report that the subtitles file is being written """
 270                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 271
 272         def report_writeinfojson(self, infofn):
 273                 """ Report that the metadata file has been written """
 274                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 275
 276         def report_destination(self, filename):
 277                 """Report destination filename."""
 278                 self.to_screen(u'[download] Destination: ' + filename)
 279
 280         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 281                 """Report download progress."""
 282                 if self.params.get('noprogress', False):
 283                         return
 284                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 285                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 286                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 287                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 288
 289         def report_resuming_byte(self, resume_len):
 290                 """Report attempt to resume at given byte."""
 291                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 292
 293         def report_retry(self, count, retries):
 294                 """Report retry in case of HTTP error 5xx"""
 295                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 296
 297         def report_file_already_downloaded(self, file_name):
 298                 """Report file has already been fully downloaded."""
 299                 try:
 300                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 301                 except (UnicodeEncodeError), err:
 302                         self.to_screen(u'[download] The file has already been downloaded')
 303
 304         def report_unable_to_resume(self):
 305                 """Report it was impossible to resume download."""
 306                 self.to_screen(u'[download] Unable to resume')
 307
 308         def report_finish(self):
 309                 """Report download finished."""
 310                 if self.params.get('noprogress', False):
 311                         self.to_screen(u'[download] Download completed')
 312                 else:
 313                         self.to_screen(u'')
 314
 315         def increment_downloads(self):
 316                 """Increment the ordinal that assigns a number to each file."""
 317                 self._num_downloads += 1
 318
 319         def prepare_filename(self, info_dict):
 320                 """Generate the output filename."""
 321                 try:
 322                         template_dict = dict(info_dict)
 323                         template_dict['epoch'] = unicode(long(time.time()))
 324                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 325                         filename = self.params['outtmpl'] % template_dict
 326                         return filename
 327                 except (ValueError, KeyError), err:
 328                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 329                         return None
 330
 331         def _match_entry(self, info_dict):
 332                 """ Returns None iff the file should be downloaded """
 333
 334                 title = info_dict['title']
 335                 matchtitle = self.params.get('matchtitle', False)
 336                 if matchtitle:
 337                         matchtitle = matchtitle.decode('utf8')
 338                         if not re.search(matchtitle, title, re.IGNORECASE):
 339                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 340                 rejecttitle = self.params.get('rejecttitle', False)
 341                 if rejecttitle:
 342                         rejecttitle = rejecttitle.decode('utf8')
 343                         if re.search(rejecttitle, title, re.IGNORECASE):
 344                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 345                 return None
 346
 347         def process_info(self, info_dict):
 348                 """Process a single dictionary returned by an InfoExtractor."""
 349
 350                 info_dict['stitle'] = sanitize_filename(info_dict['title'])
 351
 352                 reason = self._match_entry(info_dict)
 353                 if reason is not None:
 354                         self.to_screen(u'[download] ' + reason)
 355                         return
 356
 357                 max_downloads = self.params.get('max_downloads')
 358                 if max_downloads is not None:
 359                         if self._num_downloads > int(max_downloads):
 360                                 raise MaxDownloadsReached()
 361
 362                 filename = self.prepare_filename(info_dict)
 363
 364                 # Forced printings
 365                 if self.params.get('forcetitle', False):
 366                         print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 367                 if self.params.get('forceurl', False):
 368                         print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 369                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 370                         print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 371                 if self.params.get('forcedescription', False) and 'description' in info_dict:
 372                         print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 373                 if self.params.get('forcefilename', False) and filename is not None:
 374                         print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 375                 if self.params.get('forceformat', False):
 376                         print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
 377
 378                 # Do nothing else if in simulate mode
 379                 if self.params.get('simulate', False):
 380                         return
 381
 382                 if filename is None:
 383                         return
 384
 385                 try:
 386                         dn = os.path.dirname(encodeFilename(filename))
 387                         if dn != '' and not os.path.exists(dn): # dn is already encoded
 388                                 os.makedirs(dn)
 389                 except (OSError, IOError), err:
 390                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 391                         return
 392
 393                 if self.params.get('writedescription', False):
 394                         try:
 395                                 descfn = filename + u'.description'
 396                                 self.report_writedescription(descfn)
 397                                 descfile = open(encodeFilename(descfn), 'wb')
 398                                 try:
 399                                         descfile.write(info_dict['description'].encode('utf-8'))
 400                                 finally:
 401                                         descfile.close()
 402                         except (OSError, IOError):
 403                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 404                                 return
 405
 406                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 407                         # subtitles download errors are already managed as troubles in relevant IE
 408                         # that way it will silently go on when used with unsupporting IE
 409                         try:
 410                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 411                                 self.report_writesubtitles(srtfn)
 412                                 srtfile = open(encodeFilename(srtfn), 'wb')
 413                                 try:
 414                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
 415                                 finally:
 416                                         srtfile.close()
 417                         except (OSError, IOError):
 418                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 419                                 return
 420
 421                 if self.params.get('writeinfojson', False):
 422                         infofn = filename + u'.info.json'
 423                         self.report_writeinfojson(infofn)
 424                         try:
 425                                 json.dump
 426                         except (NameError,AttributeError):
 427                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 428                                 return
 429                         try:
 430                                 infof = open(encodeFilename(infofn), 'wb')
 431                                 try:
 432                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 433                                         json.dump(json_info_dict, infof)
 434                                 finally:
 435                                         infof.close()
 436                         except (OSError, IOError):
 437                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 438                                 return
 439
 440                 if not self.params.get('skip_download', False):
 441                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 442                                 success = True
 443                         else:
 444                                 try:
 445                                         success = self._do_download(filename, info_dict)
 446                                 except (OSError, IOError), err:
 447                                         raise UnavailableVideoError
 448                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 449                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 450                                         return
 451                                 except (ContentTooShortError, ), err:
 452                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 453                                         return
 454
 455                         if success:
 456                                 try:
 457                                         self.post_process(filename, info_dict)
 458                                 except (PostProcessingError), err:
 459                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
 460                                         return
 461
 462         def download(self, url_list):
 463                 """Download a given list of URLs."""
 464                 if len(url_list) > 1 and self.fixed_template():
 465                         raise SameFileError(self.params['outtmpl'])
 466
 467                 for url in url_list:
 468                         suitable_found = False
 469                         for ie in self._ies:
 470                                 # Go to next InfoExtractor if not suitable
 471                                 if not ie.suitable(url):
 472                                         continue
 473
 474                                 # Suitable InfoExtractor found
 475                                 suitable_found = True
 476
 477                                 # Extract information from URL and process it
 478                                 videos = ie.extract(url)
 479                                 for video in videos or []:
 480                                         video['extractor'] = ie.IE_NAME
 481                                         try:
 482                                                 self.increment_downloads()
 483                                                 self.process_info(video)
 484                                         except UnavailableVideoError:
 485                                                 self.trouble(u'\nERROR: unable to download video')
 486
 487                                 # Suitable InfoExtractor had been found; go to next URL
 488                                 break
 489
 490                         if not suitable_found:
 491                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 492
 493                 return self._download_retcode
 494
 495         def post_process(self, filename, ie_info):
 496                 """Run the postprocessing chain on the given file."""
 497                 info = dict(ie_info)
 498                 info['filepath'] = filename
 499                 for pp in self._pps:
 500                         info = pp.run(info)
 501                         if info is None:
 502                                 break
 503
 504         def _download_with_rtmpdump(self, filename, url, player_url):
 505                 self.report_destination(filename)
 506                 tmpfilename = self.temp_name(filename)
 507
 508                 # Check for rtmpdump first
 509                 try:
 510                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 511                 except (OSError, IOError):
 512                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 513                         return False
 514
 515                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 516                 # the connection was interrumpted and resuming appears to be
 517                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 518                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 519                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 520                 if self.params.get('verbose', False):
 521                         try:
 522                                 import pipes
 523                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 524                         except ImportError:
 525                                 shell_quote = repr
 526                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 527                 retval = subprocess.call(args)
 528                 while retval == 2 or retval == 1:
 529                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
 530                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 531                         time.sleep(5.0) # This seems to be needed
 532                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 533                         cursize = os.path.getsize(encodeFilename(tmpfilename))
 534                         if prevsize == cursize and retval == 1:
 535                                 break
 536                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 537                         if prevsize == cursize and retval == 2 and cursize > 1024:
 538                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 539                                 retval = 0
 540                                 break
 541                 if retval == 0:
 542                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 543                         self.try_rename(tmpfilename, filename)
 544                         return True
 545                 else:
 546                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 547                         return False
 548
 549         def _do_download(self, filename, info_dict):
 550                 url = info_dict['url']
 551                 player_url = info_dict.get('player_url', None)
 552
 553                 # Check file already present
 554                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 555                         self.report_file_already_downloaded(filename)
 556                         return True
 557
 558                 # Attempt to download using rtmpdump
 559                 if url.startswith('rtmp'):
 560                         return self._download_with_rtmpdump(filename, url, player_url)
 561
 562                 tmpfilename = self.temp_name(filename)
 563                 stream = None
 564
 565                 # Do not include the Accept-Encoding header
 566                 headers = {'Youtubedl-no-compression': 'True'}
 567                 basic_request = urllib2.Request(url, None, headers)
 568                 request = urllib2.Request(url, None, headers)
 569
 570                 # Establish possible resume length
 571                 if os.path.isfile(encodeFilename(tmpfilename)):
 572                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
 573                 else:
 574                         resume_len = 0
 575
 576                 open_mode = 'wb'
 577                 if resume_len != 0:
 578                         if self.params.get('continuedl', False):
 579                                 self.report_resuming_byte(resume_len)
 580                                 request.add_header('Range','bytes=%d-' % resume_len)
 581                                 open_mode = 'ab'
 582                         else:
 583                                 resume_len = 0
 584
 585                 count = 0
 586                 retries = self.params.get('retries', 0)
 587                 while count <= retries:
 588                         # Establish connection
 589                         try:
 590                                 if count == 0 and 'urlhandle' in info_dict:
 591                                         data = info_dict['urlhandle']
 592                                 data = urllib2.urlopen(request)
 593                                 break
 594                         except (urllib2.HTTPError, ), err:
 595                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 596                                         # Unexpected HTTP error
 597                                         raise
 598                                 elif err.code == 416:
 599                                         # Unable to resume (requested range not satisfiable)
 600                                         try:
 601                                                 # Open the connection again without the range header
 602                                                 data = urllib2.urlopen(basic_request)
 603                                                 content_length = data.info()['Content-Length']
 604                                         except (urllib2.HTTPError, ), err:
 605                                                 if err.code < 500 or err.code >= 600:
 606                                                         raise
 607                                         else:
 608                                                 # Examine the reported length
 609                                                 if (content_length is not None and
 610                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 611                                                         # The file had already been fully downloaded.
 612                                                         # Explanation to the above condition: in issue #175 it was revealed that
 613                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 614                                                         # changing the file size slightly and causing problems for some users. So
 615                                                         # I decided to implement a suggested change and consider the file
 616                                                         # completely downloaded if the file size differs less than 100 bytes from
 617                                                         # the one in the hard drive.
 618                                                         self.report_file_already_downloaded(filename)
 619                                                         self.try_rename(tmpfilename, filename)
 620                                                         return True
 621                                                 else:
 622                                                         # The length does not match, we start the download over
 623                                                         self.report_unable_to_resume()
 624                                                         open_mode = 'wb'
 625                                                         break
 626                         # Retry
 627                         count += 1
 628                         if count <= retries:
 629                                 self.report_retry(count, retries)
 630
 631                 if count > retries:
 632                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 633                         return False
 634
 635                 data_len = data.info().get('Content-length', None)
 636                 if data_len is not None:
 637                         data_len = long(data_len) + resume_len
 638                 data_len_str = self.format_bytes(data_len)
 639                 byte_counter = 0 + resume_len
 640                 block_size = 1024
 641                 start = time.time()
 642                 while True:
 643                         # Download and write
 644                         before = time.time()
 645                         data_block = data.read(block_size)
 646                         after = time.time()
 647                         if len(data_block) == 0:
 648                                 break
 649                         byte_counter += len(data_block)
 650
 651                         # Open file just in time
 652                         if stream is None:
 653                                 try:
 654                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 655                                         assert stream is not None
 656                                         filename = self.undo_temp_name(tmpfilename)
 657                                         self.report_destination(filename)
 658                                 except (OSError, IOError), err:
 659                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 660                                         return False
 661                         try:
 662                                 stream.write(data_block)
 663                         except (IOError, OSError), err:
 664                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 665                                 return False
 666                         block_size = self.best_block_size(after - before, len(data_block))
 667
 668                         # Progress message
 669                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 670                         if data_len is None:
 671                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 672                         else:
 673                                 percent_str = self.calc_percent(byte_counter, data_len)
 674                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 675                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 676
 677                         # Apply rate limit
 678                         self.slow_down(start, byte_counter - resume_len)
 679
 680                 if stream is None:
 681                         self.trouble(u'\nERROR: Did not get any data blocks')
 682                         return False
 683                 stream.close()
 684                 self.report_finish()
 685                 if data_len is not None and byte_counter != data_len:
 686                         raise ContentTooShortError(byte_counter, long(data_len))
 687                 self.try_rename(tmpfilename, filename)
 688
 689                 # Update file modification time
 690                 if self.params.get('updatetime', True):
 691                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 692
 693                 return True