2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
21 from .InfoExtractors import get_info_extractor
24 class FileDownloader(object):
25 """File Downloader class.
27 File downloader objects are the ones responsible of downloading the
28 actual video file and writing it to disk if the user has requested
29 it, among some other tasks. In most cases there should be one per
30 program. As, given a video URL, the downloader doesn't know how to
31 extract all the needed information, task that InfoExtractors do, it
32 has to pass the URL to one of them.
34 For this, file downloader objects have a method that allows
35 InfoExtractors to be registered in a given order. When it is passed
36 a URL, the file downloader handles it to the first InfoExtractor it
37 finds that reports being able to handle it. The InfoExtractor extracts
38 all the information about the video or videos the URL refers to, and
39 asks the FileDownloader to process the video information, possibly
40 downloading the video.
42 File downloaders accept a lot of parameters. In order not to saturate
43 the object constructor with arguments, it receives a dictionary of
44 options instead. These options are available through the params
45 attribute for the InfoExtractors to use. The FileDownloader also
46 registers itself as the downloader in charge for the InfoExtractors
47 that are added to it, so this is a "mutual registration".
51 username: Username for authentication purposes.
52 password: Password for authentication purposes.
53 usenetrc: Use netrc for authentication instead.
54 quiet: Do not print messages to stdout.
55 forceurl: Force printing final URL.
56 forcetitle: Force printing title.
57 forceid: Force printing ID.
58 forcethumbnail: Force printing thumbnail URL.
59 forcedescription: Force printing description.
60 forcefilename: Force printing final filename.
61 simulate: Do not download the video files.
62 format: Video format code.
63 format_limit: Highest quality format to try.
64 outtmpl: Template for output names.
65 restrictfilenames: Do not allow "&" and spaces in file names
66 ignoreerrors: Do not stop on download errors.
67 ratelimit: Download speed limit, in bytes/sec.
68 nooverwrites: Prevent overwriting files.
69 retries: Number of times to retry for HTTP error 5xx
70 buffersize: Size of download buffer in bytes.
71 noresizebuffer: Do not automatically resize the download buffer.
72 continuedl: Try to continue downloads if possible.
73 noprogress: Do not print the progress bar.
74 playliststart: Playlist item to start at.
75 playlistend: Playlist item to end at.
76 matchtitle: Download only matching titles.
77 rejecttitle: Reject downloads for matching titles.
78 logtostderr: Log messages to stderr instead of stdout.
79 consoletitle: Display progress in console window's titlebar.
80 nopart: Do not use temporary .part files.
81 updatetime: Use the Last-modified header to set output file timestamps.
82 writedescription: Write the video description to a .description file
83 writeinfojson: Write the video description to a .info.json file
84 writethumbnail: Write the thumbnail image to a file
85 writesubtitles: Write the video subtitles to a file
86 allsubtitles: Downloads all the subtitles of the video
87 listsubtitles: Lists all available subtitles for the video
88 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
89 subtitleslang: Language of the subtitles to download
90 test: Download only first bytes to test the downloader.
91 keepvideo: Keep the video file after post-processing
92 min_filesize: Skip files smaller than this size
93 max_filesize: Skip files larger than this size
94 daterange: A DateRange object, download only if the upload_date is in the range.
95 skip_download: Skip the actual download of the video file
101 _download_retcode = None
102 _num_downloads = None
105 def __init__(self, params):
106 """Create a FileDownloader object with the given options."""
109 self._progress_hooks = []
110 self._download_retcode = 0
111 self._num_downloads = 0
112 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
115 if '%(stitle)s' in self.params['outtmpl']:
116 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
119 def format_bytes(bytes):
122 if type(bytes) is str:
127 exponent = int(math.log(bytes, 1024.0))
128 suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
129 converted = float(bytes) / float(1024 ** exponent)
130 return '%.2f%s' % (converted, suffix)
133 def calc_percent(byte_counter, data_len):
136 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
139 def calc_eta(start, now, total, current):
143 if current == 0 or dif < 0.001: # One millisecond
145 rate = float(current) / dif
146 eta = int((float(total) - float(current)) / rate)
147 (eta_mins, eta_secs) = divmod(eta, 60)
150 return '%02d:%02d' % (eta_mins, eta_secs)
153 def calc_speed(start, now, bytes):
155 if bytes == 0 or dif < 0.001: # One millisecond
156 return '%10s' % '---b/s'
157 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
160 def best_block_size(elapsed_time, bytes):
161 new_min = max(bytes / 2.0, 1.0)
162 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
163 if elapsed_time < 0.001:
165 rate = bytes / elapsed_time
173 def parse_bytes(bytestr):
174 """Parse a string indicating a byte quantity into an integer."""
175 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
178 number = float(matchobj.group(1))
179 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
180 return int(round(number * multiplier))
182 def add_info_extractor(self, ie):
183 """Add an InfoExtractor object to the end of the list."""
185 ie.set_downloader(self)
187 def add_post_processor(self, pp):
188 """Add a PostProcessor object to the end of the chain."""
190 pp.set_downloader(self)
192 def to_screen(self, message, skip_eol=False):
193 """Print message to stdout if not in quiet mode."""
194 assert type(message) == type(u'')
195 if not self.params.get('quiet', False):
196 terminator = [u'\n', u''][skip_eol]
197 output = message + terminator
198 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
199 output = output.encode(preferredencoding(), 'ignore')
200 self._screen_file.write(output)
201 self._screen_file.flush()
203 def to_stderr(self, message):
204 """Print message to stderr."""
205 assert type(message) == type(u'')
206 output = message + u'\n'
207 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
208 output = output.encode(preferredencoding())
209 sys.stderr.write(output)
211 def to_cons_title(self, message):
212 """Set console/terminal window title to message."""
213 if not self.params.get('consoletitle', False):
215 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
216 # c_wchar_p() might not be necessary if `message` is
217 # already of type unicode()
218 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
219 elif 'TERM' in os.environ:
220 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
222 def fixed_template(self):
223 """Checks if the output template is fixed."""
224 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
226 def trouble(self, message=None, tb=None):
227 """Determine action to take when a download problem appears.
229 Depending on if the downloader has been configured to ignore
230 download errors or not, this method may throw an exception or
231 not when errors are found, after printing the message.
233 tb, if given, is additional traceback information.
235 if message is not None:
236 self.to_stderr(message)
237 if self.params.get('verbose'):
239 if sys.exc_info()[0]: # if .trouble has been called from an except block
241 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
242 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
243 tb += compat_str(traceback.format_exc())
245 tb_data = traceback.format_list(traceback.extract_stack())
246 tb = u''.join(tb_data)
248 if not self.params.get('ignoreerrors', False):
249 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
250 exc_info = sys.exc_info()[1].exc_info
252 exc_info = sys.exc_info()
253 raise DownloadError(message, exc_info)
254 self._download_retcode = 1
256 def report_warning(self, message):
258 Print the message to stderr, it will be prefixed with 'WARNING:'
259 If stderr is a tty file the 'WARNING:' will be colored
261 if sys.stderr.isatty() and os.name != 'nt':
262 _msg_header=u'\033[0;33mWARNING:\033[0m'
264 _msg_header=u'WARNING:'
265 warning_message=u'%s %s' % (_msg_header,message)
266 self.to_stderr(warning_message)
268 def report_error(self, message, tb=None):
270 Do the same as trouble, but prefixes the message with 'ERROR:', colored
271 in red if stderr is a tty file.
273 if sys.stderr.isatty() and os.name != 'nt':
274 _msg_header = u'\033[0;31mERROR:\033[0m'
276 _msg_header = u'ERROR:'
277 error_message = u'%s %s' % (_msg_header, message)
278 self.trouble(error_message, tb)
280 def slow_down(self, start_time, byte_counter):
281 """Sleep if the download speed is over the rate limit."""
282 rate_limit = self.params.get('ratelimit', None)
283 if rate_limit is None or byte_counter == 0:
286 elapsed = now - start_time
289 speed = float(byte_counter) / elapsed
290 if speed > rate_limit:
291 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
293 def temp_name(self, filename):
294 """Returns a temporary filename for the given filename."""
295 if self.params.get('nopart', False) or filename == u'-' or \
296 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
298 return filename + u'.part'
300 def undo_temp_name(self, filename):
301 if filename.endswith(u'.part'):
302 return filename[:-len(u'.part')]
305 def try_rename(self, old_filename, new_filename):
307 if old_filename == new_filename:
309 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
310 except (IOError, OSError) as err:
311 self.report_error(u'unable to rename file')
313 def try_utime(self, filename, last_modified_hdr):
314 """Try to set the last-modified time of the given file."""
315 if last_modified_hdr is None:
317 if not os.path.isfile(encodeFilename(filename)):
319 timestr = last_modified_hdr
322 filetime = timeconvert(timestr)
326 os.utime(filename, (time.time(), filetime))
331 def report_writedescription(self, descfn):
332 """ Report that the description file is being written """
333 self.to_screen(u'[info] Writing video description to: ' + descfn)
335 def report_writesubtitles(self, sub_filename):
336 """ Report that the subtitles file is being written """
337 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
339 def report_writeinfojson(self, infofn):
340 """ Report that the metadata file has been written """
341 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
343 def report_destination(self, filename):
344 """Report destination filename."""
345 self.to_screen(u'[download] Destination: ' + filename)
347 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
348 """Report download progress."""
349 if self.params.get('noprogress', False):
351 clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
352 if self.params.get('progress_with_newline', False):
353 self.to_screen(u'[download] %s of %s at %s ETA %s' %
354 (percent_str, data_len_str, speed_str, eta_str))
356 self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
357 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
358 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
359 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
361 def report_resuming_byte(self, resume_len):
362 """Report attempt to resume at given byte."""
363 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
365 def report_retry(self, count, retries):
366 """Report retry in case of HTTP error 5xx"""
367 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
369 def report_file_already_downloaded(self, file_name):
370 """Report file has already been fully downloaded."""
372 self.to_screen(u'[download] %s has already been downloaded' % file_name)
373 except (UnicodeEncodeError) as err:
374 self.to_screen(u'[download] The file has already been downloaded')
376 def report_unable_to_resume(self):
377 """Report it was impossible to resume download."""
378 self.to_screen(u'[download] Unable to resume')
380 def report_finish(self):
381 """Report download finished."""
382 if self.params.get('noprogress', False):
383 self.to_screen(u'[download] Download completed')
387 def increment_downloads(self):
388 """Increment the ordinal that assigns a number to each file."""
389 self._num_downloads += 1
391 def prepare_filename(self, info_dict):
392 """Generate the output filename."""
394 template_dict = dict(info_dict)
396 template_dict['epoch'] = int(time.time())
397 autonumber_size = self.params.get('autonumber_size')
398 if autonumber_size is None:
400 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
401 template_dict['autonumber'] = autonumber_templ % self._num_downloads
402 if template_dict['playlist_index'] is not None:
403 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
405 sanitize = lambda k,v: sanitize_filename(
406 u'NA' if v is None else compat_str(v),
407 restricted=self.params.get('restrictfilenames'),
409 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
411 filename = self.params['outtmpl'] % template_dict
413 except KeyError as err:
414 self.report_error(u'Erroneous output template')
416 except ValueError as err:
417 self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
420 def _match_entry(self, info_dict):
421 """ Returns None iff the file should be downloaded """
423 title = info_dict['title']
424 matchtitle = self.params.get('matchtitle', False)
426 if not re.search(matchtitle, title, re.IGNORECASE):
427 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
428 rejecttitle = self.params.get('rejecttitle', False)
430 if re.search(rejecttitle, title, re.IGNORECASE):
431 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
432 date = info_dict.get('upload_date', None)
434 dateRange = self.params.get('daterange', DateRange())
435 if date not in dateRange:
436 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
439 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
441 Returns a list with a dictionary for each video we find.
442 If 'download', also downloads the videos.
443 extra_info is a dict containing the extra values to add to each result
447 ie = get_info_extractor(ie_key)()
448 ie.set_downloader(self)
454 if not ie.suitable(url):
458 self.report_warning(u'The program functionality for this site has been marked as broken, '
459 u'and will probably not work.')
462 ie_result = ie.extract(url)
463 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
465 if isinstance(ie_result, list):
466 # Backwards compatibility: old IE result format
467 for result in ie_result:
468 result.update(extra_info)
470 '_type': 'compat_list',
471 'entries': ie_result,
474 ie_result.update(extra_info)
475 if 'extractor' not in ie_result:
476 ie_result['extractor'] = ie.IE_NAME
477 return self.process_ie_result(ie_result, download=download)
478 except ExtractorError as de: # An error we somewhat expected
479 self.report_error(compat_str(de), de.format_traceback())
481 except Exception as e:
482 if self.params.get('ignoreerrors', False):
483 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
488 self.report_error(u'no suitable InfoExtractor: %s' % url)
490 def process_ie_result(self, ie_result, download=True, extra_info={}):
492 Take the result of the ie(may be modified) and resolve all unresolved
493 references (URLs, playlist items).
495 It will also download the videos if 'download'.
496 Returns the resolved ie_result.
499 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
500 if result_type == 'video':
501 if 'playlist' not in ie_result:
502 # It isn't part of a playlist
503 ie_result['playlist'] = None
504 ie_result['playlist_index'] = None
506 self.process_info(ie_result)
508 elif result_type == 'url':
509 # We have to add extra_info to the results because it may be
510 # contained in a playlist
511 return self.extract_info(ie_result['url'],
513 ie_key=ie_result.get('ie_key'),
514 extra_info=extra_info)
515 elif result_type == 'playlist':
516 # We process each entry in the playlist
517 playlist = ie_result.get('title', None) or ie_result.get('id', None)
518 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
520 playlist_results = []
522 n_all_entries = len(ie_result['entries'])
523 playliststart = self.params.get('playliststart', 1) - 1
524 playlistend = self.params.get('playlistend', -1)
526 if playlistend == -1:
527 entries = ie_result['entries'][playliststart:]
529 entries = ie_result['entries'][playliststart:playlistend]
531 n_entries = len(entries)
533 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
534 (ie_result['extractor'], playlist, n_all_entries, n_entries))
536 for i,entry in enumerate(entries,1):
537 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
539 'playlist': playlist,
540 'playlist_index': i + playliststart,
542 if not 'extractor' in entry:
543 # We set the extractor, if it's an url it will be set then to
544 # the new extractor, but if it's already a video we must make
545 # sure it's present: see issue #877
546 entry['extractor'] = ie_result['extractor']
547 entry_result = self.process_ie_result(entry,
550 playlist_results.append(entry_result)
551 ie_result['entries'] = playlist_results
553 elif result_type == 'compat_list':
555 r.setdefault('extractor', ie_result['extractor'])
557 ie_result['entries'] = [
558 self.process_ie_result(_fixup(r), download=download)
559 for r in ie_result['entries']
563 raise Exception('Invalid result type: %s' % result_type)
565 def process_info(self, info_dict):
566 """Process a single resolved IE result."""
568 assert info_dict.get('_type', 'video') == 'video'
569 #We increment the download the download count here to match the previous behaviour.
570 self.increment_downloads()
572 info_dict['fulltitle'] = info_dict['title']
573 if len(info_dict['title']) > 200:
574 info_dict['title'] = info_dict['title'][:197] + u'...'
576 # Keep for backwards compatibility
577 info_dict['stitle'] = info_dict['title']
579 if not 'format' in info_dict:
580 info_dict['format'] = info_dict['ext']
582 reason = self._match_entry(info_dict)
583 if reason is not None:
584 self.to_screen(u'[download] ' + reason)
587 max_downloads = self.params.get('max_downloads')
588 if max_downloads is not None:
589 if self._num_downloads > int(max_downloads):
590 raise MaxDownloadsReached()
592 filename = self.prepare_filename(info_dict)
595 if self.params.get('forcetitle', False):
596 compat_print(info_dict['title'])
597 if self.params.get('forceid', False):
598 compat_print(info_dict['id'])
599 if self.params.get('forceurl', False):
600 compat_print(info_dict['url'])
601 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
602 compat_print(info_dict['thumbnail'])
603 if self.params.get('forcedescription', False) and 'description' in info_dict:
604 compat_print(info_dict['description'])
605 if self.params.get('forcefilename', False) and filename is not None:
606 compat_print(filename)
607 if self.params.get('forceformat', False):
608 compat_print(info_dict['format'])
610 # Do nothing else if in simulate mode
611 if self.params.get('simulate', False):
618 dn = os.path.dirname(encodeFilename(filename))
619 if dn != '' and not os.path.exists(dn):
621 except (OSError, IOError) as err:
622 self.report_error(u'unable to create directory ' + compat_str(err))
625 if self.params.get('writedescription', False):
627 descfn = filename + u'.description'
628 self.report_writedescription(descfn)
629 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
630 descfile.write(info_dict['description'])
631 except (OSError, IOError):
632 self.report_error(u'Cannot write description file ' + descfn)
635 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
636 # subtitles download errors are already managed as troubles in relevant IE
637 # that way it will silently go on when used with unsupporting IE
638 subtitle = info_dict['subtitles'][0]
639 (sub_error, sub_lang, sub) = subtitle
640 sub_format = self.params.get('subtitlesformat')
642 self.report_warning("Some error while getting the subtitles")
645 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
646 self.report_writesubtitles(sub_filename)
647 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
649 except (OSError, IOError):
650 self.report_error(u'Cannot write subtitles file ' + descfn)
653 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
654 subtitles = info_dict['subtitles']
655 sub_format = self.params.get('subtitlesformat')
656 for subtitle in subtitles:
657 (sub_error, sub_lang, sub) = subtitle
659 self.report_warning("Some error while getting the subtitles")
662 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
663 self.report_writesubtitles(sub_filename)
664 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
666 except (OSError, IOError):
667 self.report_error(u'Cannot write subtitles file ' + descfn)
670 if self.params.get('writeinfojson', False):
671 infofn = filename + u'.info.json'
672 self.report_writeinfojson(infofn)
674 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
675 write_json_file(json_info_dict, encodeFilename(infofn))
676 except (OSError, IOError):
677 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
680 if self.params.get('writethumbnail', False):
681 if 'thumbnail' in info_dict:
682 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
685 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
686 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
687 (info_dict['extractor'], info_dict['id']))
688 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
689 with open(thumb_filename, 'wb') as thumbf:
690 shutil.copyfileobj(uf, thumbf)
691 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
692 (info_dict['extractor'], info_dict['id'], thumb_filename))
694 if not self.params.get('skip_download', False):
695 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
699 success = self._do_download(filename, info_dict)
700 except (OSError, IOError) as err:
701 raise UnavailableVideoError()
702 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
703 self.report_error(u'unable to download video data: %s' % str(err))
705 except (ContentTooShortError, ) as err:
706 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
711 self.post_process(filename, info_dict)
712 except (PostProcessingError) as err:
713 self.report_error(u'postprocessing: %s' % str(err))
716 def download(self, url_list):
717 """Download a given list of URLs."""
718 if len(url_list) > 1 and self.fixed_template():
719 raise SameFileError(self.params['outtmpl'])
723 #It also downloads the videos
724 videos = self.extract_info(url)
725 except UnavailableVideoError:
726 self.report_error(u'unable to download video')
727 except MaxDownloadsReached:
728 self.to_screen(u'[info] Maximum number of downloaded files reached.')
731 return self._download_retcode
733 def post_process(self, filename, ie_info):
734 """Run all the postprocessors on the given file."""
736 info['filepath'] = filename
740 keep_video_wish,new_info = pp.run(info)
741 if keep_video_wish is not None:
743 keep_video = keep_video_wish
744 elif keep_video is None:
745 # No clear decision yet, let IE decide
746 keep_video = keep_video_wish
747 except PostProcessingError as e:
748 self.to_stderr(u'ERROR: ' + e.msg)
749 if keep_video is False and not self.params.get('keepvideo', False):
751 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
752 os.remove(encodeFilename(filename))
753 except (IOError, OSError):
754 self.report_warning(u'Unable to remove downloaded video file')
756 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
757 self.report_destination(filename)
758 tmpfilename = self.temp_name(filename)
760 # Check for rtmpdump first
762 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
763 except (OSError, IOError):
764 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
766 verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
768 # Download using rtmpdump. rtmpdump returns exit code 2 when
769 # the connection was interrumpted and resuming appears to be
770 # possible. This is part of rtmpdump's normal usage, AFAIK.
771 basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
772 if player_url is not None:
773 basic_args += ['--swfVfy', player_url]
774 if page_url is not None:
775 basic_args += ['--pageUrl', page_url]
776 if play_path is not None:
777 basic_args += ['--playpath', play_path]
778 if tc_url is not None:
779 basic_args += ['--tcUrl', url]
780 args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
781 if self.params.get('verbose', False):
784 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
787 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
788 retval = subprocess.call(args)
789 while retval == 2 or retval == 1:
790 prevsize = os.path.getsize(encodeFilename(tmpfilename))
791 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
792 time.sleep(5.0) # This seems to be needed
793 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
794 cursize = os.path.getsize(encodeFilename(tmpfilename))
795 if prevsize == cursize and retval == 1:
797 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
798 if prevsize == cursize and retval == 2 and cursize > 1024:
799 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
803 fsize = os.path.getsize(encodeFilename(tmpfilename))
804 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
805 self.try_rename(tmpfilename, filename)
806 self._hook_progress({
807 'downloaded_bytes': fsize,
808 'total_bytes': fsize,
809 'filename': filename,
810 'status': 'finished',
814 self.to_stderr(u"\n")
815 self.report_error(u'rtmpdump exited with code %d' % retval)
818 def _download_with_mplayer(self, filename, url):
819 self.report_destination(filename)
820 tmpfilename = self.temp_name(filename)
822 args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
823 # Check for mplayer first
825 subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
826 except (OSError, IOError):
827 self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
830 # Download using mplayer.
831 retval = subprocess.call(args)
833 fsize = os.path.getsize(encodeFilename(tmpfilename))
834 self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
835 self.try_rename(tmpfilename, filename)
836 self._hook_progress({
837 'downloaded_bytes': fsize,
838 'total_bytes': fsize,
839 'filename': filename,
840 'status': 'finished',
844 self.to_stderr(u"\n")
845 self.report_error(u'mplayer exited with code %d' % retval)
849 def _do_download(self, filename, info_dict):
850 url = info_dict['url']
852 # Check file already present
853 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
854 self.report_file_already_downloaded(filename)
855 self._hook_progress({
856 'filename': filename,
857 'status': 'finished',
861 # Attempt to download using rtmpdump
862 if url.startswith('rtmp'):
863 return self._download_with_rtmpdump(filename, url,
864 info_dict.get('player_url', None),
865 info_dict.get('page_url', None),
866 info_dict.get('play_path', None),
867 info_dict.get('tc_url', None))
869 # Attempt to download using mplayer
870 if url.startswith('mms') or url.startswith('rtsp'):
871 return self._download_with_mplayer(filename, url)
873 tmpfilename = self.temp_name(filename)
876 # Do not include the Accept-Encoding header
877 headers = {'Youtubedl-no-compression': 'True'}
878 if 'user_agent' in info_dict:
879 headers['Youtubedl-user-agent'] = info_dict['user_agent']
880 basic_request = compat_urllib_request.Request(url, None, headers)
881 request = compat_urllib_request.Request(url, None, headers)
883 if self.params.get('test', False):
884 request.add_header('Range','bytes=0-10240')
886 # Establish possible resume length
887 if os.path.isfile(encodeFilename(tmpfilename)):
888 resume_len = os.path.getsize(encodeFilename(tmpfilename))
894 if self.params.get('continuedl', False):
895 self.report_resuming_byte(resume_len)
896 request.add_header('Range','bytes=%d-' % resume_len)
902 retries = self.params.get('retries', 0)
903 while count <= retries:
904 # Establish connection
906 if count == 0 and 'urlhandle' in info_dict:
907 data = info_dict['urlhandle']
908 data = compat_urllib_request.urlopen(request)
910 except (compat_urllib_error.HTTPError, ) as err:
911 if (err.code < 500 or err.code >= 600) and err.code != 416:
912 # Unexpected HTTP error
914 elif err.code == 416:
915 # Unable to resume (requested range not satisfiable)
917 # Open the connection again without the range header
918 data = compat_urllib_request.urlopen(basic_request)
919 content_length = data.info()['Content-Length']
920 except (compat_urllib_error.HTTPError, ) as err:
921 if err.code < 500 or err.code >= 600:
924 # Examine the reported length
925 if (content_length is not None and
926 (resume_len - 100 < int(content_length) < resume_len + 100)):
927 # The file had already been fully downloaded.
928 # Explanation to the above condition: in issue #175 it was revealed that
929 # YouTube sometimes adds or removes a few bytes from the end of the file,
930 # changing the file size slightly and causing problems for some users. So
931 # I decided to implement a suggested change and consider the file
932 # completely downloaded if the file size differs less than 100 bytes from
933 # the one in the hard drive.
934 self.report_file_already_downloaded(filename)
935 self.try_rename(tmpfilename, filename)
936 self._hook_progress({
937 'filename': filename,
938 'status': 'finished',
942 # The length does not match, we start the download over
943 self.report_unable_to_resume()
949 self.report_retry(count, retries)
952 self.report_error(u'giving up after %s retries' % retries)
955 data_len = data.info().get('Content-length', None)
956 if data_len is not None:
957 data_len = int(data_len) + resume_len
958 min_data_len = self.params.get("min_filesize", None)
959 max_data_len = self.params.get("max_filesize", None)
960 if min_data_len is not None and data_len < min_data_len:
961 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
963 if max_data_len is not None and data_len > max_data_len:
964 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
967 data_len_str = self.format_bytes(data_len)
968 byte_counter = 0 + resume_len
969 block_size = self.params.get('buffersize', 1024)
974 data_block = data.read(block_size)
976 if len(data_block) == 0:
978 byte_counter += len(data_block)
980 # Open file just in time
983 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
984 assert stream is not None
985 filename = self.undo_temp_name(tmpfilename)
986 self.report_destination(filename)
987 except (OSError, IOError) as err:
988 self.report_error(u'unable to open for writing: %s' % str(err))
991 stream.write(data_block)
992 except (IOError, OSError) as err:
993 self.to_stderr(u"\n")
994 self.report_error(u'unable to write data: %s' % str(err))
996 if not self.params.get('noresizebuffer', False):
997 block_size = self.best_block_size(after - before, len(data_block))
1000 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
1001 if data_len is None:
1002 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1004 percent_str = self.calc_percent(byte_counter, data_len)
1005 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1006 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1008 self._hook_progress({
1009 'downloaded_bytes': byte_counter,
1010 'total_bytes': data_len,
1011 'tmpfilename': tmpfilename,
1012 'filename': filename,
1013 'status': 'downloading',
1017 self.slow_down(start, byte_counter - resume_len)
1020 self.to_stderr(u"\n")
1021 self.report_error(u'Did not get any data blocks')
1024 self.report_finish()
1025 if data_len is not None and byte_counter != data_len:
1026 raise ContentTooShortError(byte_counter, int(data_len))
1027 self.try_rename(tmpfilename, filename)
1029 # Update file modification time
1030 if self.params.get('updatetime', True):
1031 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1033 self._hook_progress({
1034 'downloaded_bytes': byte_counter,
1035 'total_bytes': byte_counter,
1036 'filename': filename,
1037 'status': 'finished',
1042 def _hook_progress(self, status):
1043 for ph in self._progress_hooks:
1046 def add_progress_hook(self, ph):
1047 """ ph gets called on download progress, with a dictionary with the entries
1048 * filename: The final filename
1049 * status: One of "downloading" and "finished"
1051 It can also have some of the following entries:
1053 * downloaded_bytes: Bytes on disks
1054 * total_bytes: Total bytes, None if unknown
1055 * tmpfilename: The filename we're currently writing to
1057 Hooks are guaranteed to be called at least once (with status "finished")
1058 if the download is successful.
1060 self._progress_hooks.append(ph)