2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
20 from .InfoExtractors import get_info_extractor
23 class FileDownloader(object):
24 """File Downloader class.
26 File downloader objects are the ones responsible of downloading the
27 actual video file and writing it to disk if the user has requested
28 it, among some other tasks. In most cases there should be one per
29 program. As, given a video URL, the downloader doesn't know how to
30 extract all the needed information, task that InfoExtractors do, it
31 has to pass the URL to one of them.
33 For this, file downloader objects have a method that allows
34 InfoExtractors to be registered in a given order. When it is passed
35 a URL, the file downloader handles it to the first InfoExtractor it
36 finds that reports being able to handle it. The InfoExtractor extracts
37 all the information about the video or videos the URL refers to, and
38 asks the FileDownloader to process the video information, possibly
39 downloading the video.
41 File downloaders accept a lot of parameters. In order not to saturate
42 the object constructor with arguments, it receives a dictionary of
43 options instead. These options are available through the params
44 attribute for the InfoExtractors to use. The FileDownloader also
45 registers itself as the downloader in charge for the InfoExtractors
46 that are added to it, so this is a "mutual registration".
50 username: Username for authentication purposes.
51 password: Password for authentication purposes.
52 usenetrc: Use netrc for authentication instead.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 ratelimit: Download speed limit, in bytes/sec.
66 nooverwrites: Prevent overwriting files.
67 retries: Number of times to retry for HTTP error 5xx
68 buffersize: Size of download buffer in bytes.
69 noresizebuffer: Do not automatically resize the download buffer.
70 continuedl: Try to continue downloads if possible.
71 noprogress: Do not print the progress bar.
72 playliststart: Playlist item to start at.
73 playlistend: Playlist item to end at.
74 matchtitle: Download only matching titles.
75 rejecttitle: Reject downloads for matching titles.
76 logtostderr: Log messages to stderr instead of stdout.
77 consoletitle: Display progress in console window's titlebar.
78 nopart: Do not use temporary .part files.
79 updatetime: Use the Last-modified header to set output file timestamps.
80 writedescription: Write the video description to a .description file
81 writeinfojson: Write the video description to a .info.json file
82 writesubtitles: Write the video subtitles to a file
83 onlysubtitles: Downloads only the subtitles of the video
84 allsubtitles: Downloads all the subtitles of the video
85 listsubtitles: Lists all available subtitles for the video
86 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
87 subtitleslang: Language of the subtitles to download
88 test: Download only first bytes to test the downloader.
89 keepvideo: Keep the video file after post-processing
90 min_filesize: Skip files smaller than this size
91 max_filesize: Skip files larger than this size
97 _download_retcode = None
101 def __init__(self, params):
102 """Create a FileDownloader object with the given options."""
105 self._progress_hooks = []
106 self._download_retcode = 0
107 self._num_downloads = 0
108 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
111 if '%(stitle)s' in self.params['outtmpl']:
112 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
115 def format_bytes(bytes):
118 if type(bytes) is str:
123 exponent = int(math.log(bytes, 1024.0))
124 suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
125 converted = float(bytes) / float(1024 ** exponent)
126 return '%.2f%s' % (converted, suffix)
129 def calc_percent(byte_counter, data_len):
132 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
135 def calc_eta(start, now, total, current):
139 if current == 0 or dif < 0.001: # One millisecond
141 rate = float(current) / dif
142 eta = int((float(total) - float(current)) / rate)
143 (eta_mins, eta_secs) = divmod(eta, 60)
146 return '%02d:%02d' % (eta_mins, eta_secs)
149 def calc_speed(start, now, bytes):
151 if bytes == 0 or dif < 0.001: # One millisecond
152 return '%10s' % '---b/s'
153 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
156 def best_block_size(elapsed_time, bytes):
157 new_min = max(bytes / 2.0, 1.0)
158 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
159 if elapsed_time < 0.001:
161 rate = bytes / elapsed_time
169 def parse_bytes(bytestr):
170 """Parse a string indicating a byte quantity into an integer."""
171 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
174 number = float(matchobj.group(1))
175 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
176 return int(round(number * multiplier))
178 def add_info_extractor(self, ie):
179 """Add an InfoExtractor object to the end of the list."""
181 ie.set_downloader(self)
183 def add_post_processor(self, pp):
184 """Add a PostProcessor object to the end of the chain."""
186 pp.set_downloader(self)
188 def to_screen(self, message, skip_eol=False):
189 """Print message to stdout if not in quiet mode."""
190 assert type(message) == type(u'')
191 if not self.params.get('quiet', False):
192 terminator = [u'\n', u''][skip_eol]
193 output = message + terminator
194 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
195 output = output.encode(preferredencoding(), 'ignore')
196 self._screen_file.write(output)
197 self._screen_file.flush()
199 def to_stderr(self, message):
200 """Print message to stderr."""
201 assert type(message) == type(u'')
202 output = message + u'\n'
203 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
204 output = output.encode(preferredencoding())
205 sys.stderr.write(output)
207 def to_cons_title(self, message):
208 """Set console/terminal window title to message."""
209 if not self.params.get('consoletitle', False):
211 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
212 # c_wchar_p() might not be necessary if `message` is
213 # already of type unicode()
214 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
215 elif 'TERM' in os.environ:
216 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
218 def fixed_template(self):
219 """Checks if the output template is fixed."""
220 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
222 def trouble(self, message=None, tb=None):
223 """Determine action to take when a download problem appears.
225 Depending on if the downloader has been configured to ignore
226 download errors or not, this method may throw an exception or
227 not when errors are found, after printing the message.
229 tb, if given, is additional traceback information.
231 if message is not None:
232 self.to_stderr(message)
233 if self.params.get('verbose'):
235 if sys.exc_info()[0]: # if .trouble has been called from an except block
237 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
238 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
239 tb += compat_str(traceback.format_exc())
241 tb_data = traceback.format_list(traceback.extract_stack())
242 tb = u''.join(tb_data)
244 if not self.params.get('ignoreerrors', False):
245 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
246 exc_info = sys.exc_info()[1].exc_info
248 exc_info = sys.exc_info()
249 raise DownloadError(message, exc_info)
250 self._download_retcode = 1
252 def report_warning(self, message):
254 Print the message to stderr, it will be prefixed with 'WARNING:'
255 If stderr is a tty file the 'WARNING:' will be colored
257 if sys.stderr.isatty() and os.name != 'nt':
258 _msg_header=u'\033[0;33mWARNING:\033[0m'
260 _msg_header=u'WARNING:'
261 warning_message=u'%s %s' % (_msg_header,message)
262 self.to_stderr(warning_message)
264 def report_error(self, message, tb=None):
266 Do the same as trouble, but prefixes the message with 'ERROR:', colored
267 in red if stderr is a tty file.
269 if sys.stderr.isatty() and os.name != 'nt':
270 _msg_header = u'\033[0;31mERROR:\033[0m'
272 _msg_header = u'ERROR:'
273 error_message = u'%s %s' % (_msg_header, message)
274 self.trouble(error_message, tb)
276 def slow_down(self, start_time, byte_counter):
277 """Sleep if the download speed is over the rate limit."""
278 rate_limit = self.params.get('ratelimit', None)
279 if rate_limit is None or byte_counter == 0:
282 elapsed = now - start_time
285 speed = float(byte_counter) / elapsed
286 if speed > rate_limit:
287 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
289 def temp_name(self, filename):
290 """Returns a temporary filename for the given filename."""
291 if self.params.get('nopart', False) or filename == u'-' or \
292 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
294 return filename + u'.part'
296 def undo_temp_name(self, filename):
297 if filename.endswith(u'.part'):
298 return filename[:-len(u'.part')]
301 def try_rename(self, old_filename, new_filename):
303 if old_filename == new_filename:
305 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
306 except (IOError, OSError) as err:
307 self.report_error(u'unable to rename file')
309 def try_utime(self, filename, last_modified_hdr):
310 """Try to set the last-modified time of the given file."""
311 if last_modified_hdr is None:
313 if not os.path.isfile(encodeFilename(filename)):
315 timestr = last_modified_hdr
318 filetime = timeconvert(timestr)
322 os.utime(filename, (time.time(), filetime))
327 def report_writedescription(self, descfn):
328 """ Report that the description file is being written """
329 self.to_screen(u'[info] Writing video description to: ' + descfn)
331 def report_writesubtitles(self, sub_filename):
332 """ Report that the subtitles file is being written """
333 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
335 def report_writeinfojson(self, infofn):
336 """ Report that the metadata file has been written """
337 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
339 def report_destination(self, filename):
340 """Report destination filename."""
341 self.to_screen(u'[download] Destination: ' + filename)
343 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
344 """Report download progress."""
345 if self.params.get('noprogress', False):
347 if self.params.get('progress_with_newline', False):
348 self.to_screen(u'[download] %s of %s at %s ETA %s' %
349 (percent_str, data_len_str, speed_str, eta_str))
351 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
352 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
353 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
354 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
356 def report_resuming_byte(self, resume_len):
357 """Report attempt to resume at given byte."""
358 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
360 def report_retry(self, count, retries):
361 """Report retry in case of HTTP error 5xx"""
362 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
364 def report_file_already_downloaded(self, file_name):
365 """Report file has already been fully downloaded."""
367 self.to_screen(u'[download] %s has already been downloaded' % file_name)
368 except (UnicodeEncodeError) as err:
369 self.to_screen(u'[download] The file has already been downloaded')
371 def report_unable_to_resume(self):
372 """Report it was impossible to resume download."""
373 self.to_screen(u'[download] Unable to resume')
375 def report_finish(self):
376 """Report download finished."""
377 if self.params.get('noprogress', False):
378 self.to_screen(u'[download] Download completed')
382 def increment_downloads(self):
383 """Increment the ordinal that assigns a number to each file."""
384 self._num_downloads += 1
386 def prepare_filename(self, info_dict):
387 """Generate the output filename."""
389 template_dict = dict(info_dict)
391 template_dict['epoch'] = int(time.time())
392 autonumber_size = self.params.get('autonumber_size')
393 if autonumber_size is None:
395 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
396 template_dict['autonumber'] = autonumber_templ % self._num_downloads
397 if template_dict['playlist_index'] is not None:
398 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
400 sanitize = lambda k,v: sanitize_filename(
401 u'NA' if v is None else compat_str(v),
402 restricted=self.params.get('restrictfilenames'),
404 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
406 filename = self.params['outtmpl'] % template_dict
408 except KeyError as err:
409 self.report_error(u'Erroneous output template')
411 except ValueError as err:
412 self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
418 title = info_dict['title']
419 matchtitle = self.params.get('matchtitle', False)
421 if not re.search(matchtitle, title, re.IGNORECASE):
422 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
423 rejecttitle = self.params.get('rejecttitle', False)
425 if re.search(rejecttitle, title, re.IGNORECASE):
426 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
429 def extract_info(self, url, download = True, ie_name = None):
431 Returns a list with a dictionary for each video we find.
432 If 'download', also downloads the videos.
434 suitable_found = False
436 #We copy the original list
437 ies = list(self._ies)
439 if ie_name is not None:
440 #We put in the first place the given info extractor
441 first_ie = get_info_extractor(ie_name)()
442 first_ie.set_downloader(self)
443 ies.insert(0, first_ie)
446 # Go to next InfoExtractor if not suitable
447 if not ie.suitable(url):
450 # Warn if the _WORKING attribute is False
452 self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
453 u'and will probably not work. If you want to go on, use the -i option.')
455 # Suitable InfoExtractor found
456 suitable_found = True
458 # Extract information from URL and process it
460 ie_results = ie.extract(url)
461 if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
464 for ie_result in ie_results:
465 if not 'extractor' in ie_result:
466 #The extractor has already been set somewhere else
467 ie_result['extractor'] = ie.IE_NAME
468 results.append(self.process_ie_result(ie_result, download))
470 except ExtractorError as de: # An error we somewhat expected
471 self.report_error(compat_str(de), de.format_traceback())
473 except Exception as e:
474 if self.params.get('ignoreerrors', False):
475 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
479 if not suitable_found:
480 self.report_error(u'no suitable InfoExtractor: %s' % url)
482 def process_ie_result(self, ie_result, download = True):
484 Take the result of the ie and return a list of videos.
485 For url elements it will search the suitable ie and get the videos
486 For playlist elements it will process each of the elements of the 'entries' key
488 It will also download the videos if 'download'.
490 result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
491 if result_type == 'video':
492 if 'playlist' not in ie_result:
493 #It isn't part of a playlist
494 ie_result['playlist'] = None
495 ie_result['playlist_index'] = None
498 self.process_info(ie_result)
500 elif result_type == 'url':
501 #We get the video pointed by the url
502 result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
504 elif result_type == 'playlist':
505 #We process each entry in the playlist
506 playlist = ie_result.get('title', None) or ie_result.get('id', None)
507 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
509 playlist_results = []
511 n_all_entries = len(ie_result['entries'])
512 playliststart = self.params.get('playliststart', 1) - 1
513 playlistend = self.params.get('playlistend', -1)
515 if playlistend == -1:
516 entries = ie_result['entries'][playliststart:]
518 entries = ie_result['entries'][playliststart:playlistend]
520 n_entries = len(entries)
522 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
523 (ie_result['extractor'], playlist, n_all_entries, n_entries))
525 for i,entry in enumerate(entries,1):
526 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
527 entry_result = self.process_ie_result(entry, False)
528 entry_result['playlist'] = playlist
529 entry_result['playlist_index'] = i + playliststart
530 #We must do the download here to correctly set the 'playlist' key
532 self.process_info(entry_result)
533 playlist_results.append(entry_result)
534 result = ie_result.copy()
535 result['entries'] = playlist_results
538 def process_info(self, info_dict):
539 """Process a single dictionary returned by an InfoExtractor."""
541 #We increment the download the download count here to match the previous behaviour.
542 self.increment_downloads()
544 info_dict['fulltitle'] = info_dict['title']
545 if len(info_dict['title']) > 200:
546 info_dict['title'] = info_dict['title'][:197] + u'...'
548 # Keep for backwards compatibility
549 info_dict['stitle'] = info_dict['title']
551 if not 'format' in info_dict:
552 info_dict['format'] = info_dict['ext']
554 reason = self._match_entry(info_dict)
555 if reason is not None:
556 self.to_screen(u'[download] ' + reason)
559 max_downloads = self.params.get('max_downloads')
560 if max_downloads is not None:
561 if self._num_downloads > int(max_downloads):
562 raise MaxDownloadsReached()
564 filename = self.prepare_filename(info_dict)
567 if self.params.get('forcetitle', False):
568 compat_print(info_dict['title'])
569 if self.params.get('forceurl', False):
570 compat_print(info_dict['url'])
571 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
572 compat_print(info_dict['thumbnail'])
573 if self.params.get('forcedescription', False) and 'description' in info_dict:
574 compat_print(info_dict['description'])
575 if self.params.get('forcefilename', False) and filename is not None:
576 compat_print(filename)
577 if self.params.get('forceformat', False):
578 compat_print(info_dict['format'])
580 # Do nothing else if in simulate mode
581 if self.params.get('simulate', False):
588 dn = os.path.dirname(encodeFilename(filename))
589 if dn != '' and not os.path.exists(dn): # dn is already encoded
591 except (OSError, IOError) as err:
592 self.report_error(u'unable to create directory ' + compat_str(err))
595 if self.params.get('writedescription', False):
597 descfn = filename + u'.description'
598 self.report_writedescription(descfn)
599 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
600 descfile.write(info_dict['description'])
601 except (OSError, IOError):
602 self.report_error(u'Cannot write description file ' + descfn)
605 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
606 # subtitles download errors are already managed as troubles in relevant IE
607 # that way it will silently go on when used with unsupporting IE
608 subtitle = info_dict['subtitles'][0]
609 (sub_error, sub_lang, sub) = subtitle
610 sub_format = self.params.get('subtitlesformat')
612 self.report_warning("Some error while getting the subtitles")
615 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
616 self.report_writesubtitles(sub_filename)
617 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
619 except (OSError, IOError):
620 self.report_error(u'Cannot write subtitles file ' + descfn)
622 if self.params.get('onlysubtitles', False):
625 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
626 subtitles = info_dict['subtitles']
627 sub_format = self.params.get('subtitlesformat')
628 for subtitle in subtitles:
629 (sub_error, sub_lang, sub) = subtitle
631 self.report_warning("Some error while getting the subtitles")
634 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
635 self.report_writesubtitles(sub_filename)
636 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
638 except (OSError, IOError):
639 self.report_error(u'Cannot write subtitles file ' + descfn)
641 if self.params.get('onlysubtitles', False):
644 if self.params.get('writeinfojson', False):
645 infofn = filename + u'.info.json'
646 self.report_writeinfojson(infofn)
648 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
649 write_json_file(json_info_dict, encodeFilename(infofn))
650 except (OSError, IOError):
651 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
654 if not self.params.get('skip_download', False):
655 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
659 success = self._do_download(filename, info_dict)
660 except (OSError, IOError) as err:
661 raise UnavailableVideoError()
662 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
663 self.report_error(u'unable to download video data: %s' % str(err))
665 except (ContentTooShortError, ) as err:
666 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
671 self.post_process(filename, info_dict)
672 except (PostProcessingError) as err:
673 self.report_error(u'postprocessing: %s' % str(err))
676 def download(self, url_list):
677 """Download a given list of URLs."""
678 if len(url_list) > 1 and self.fixed_template():
679 raise SameFileError(self.params['outtmpl'])
683 #It also downloads the videos
684 videos = self.extract_info(url)
685 except UnavailableVideoError:
686 self.report_error(u'unable to download video')
687 except MaxDownloadsReached:
688 self.to_screen(u'[info] Maximum number of downloaded files reached.')
691 return self._download_retcode
693 def post_process(self, filename, ie_info):
694 """Run all the postprocessors on the given file."""
696 info['filepath'] = filename
700 keep_video_wish,new_info = pp.run(info)
701 if keep_video_wish is not None:
703 keep_video = keep_video_wish
704 elif keep_video is None:
705 # No clear decision yet, let IE decide
706 keep_video = keep_video_wish
707 except PostProcessingError as e:
708 self.to_stderr(u'ERROR: ' + e.msg)
709 if keep_video is False and not self.params.get('keepvideo', False):
711 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
712 os.remove(encodeFilename(filename))
713 except (IOError, OSError):
714 self.report_warning(u'Unable to remove downloaded video file')
716 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
717 self.report_destination(filename)
718 tmpfilename = self.temp_name(filename)
720 # Check for rtmpdump first
722 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
723 except (OSError, IOError):
724 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
727 # Download using rtmpdump. rtmpdump returns exit code 2 when
728 # the connection was interrumpted and resuming appears to be
729 # possible. This is part of rtmpdump's normal usage, AFAIK.
730 basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
731 if player_url is not None:
732 basic_args += ['-W', player_url]
733 if page_url is not None:
734 basic_args += ['--pageUrl', page_url]
735 if play_path is not None:
736 basic_args += ['-y', play_path]
737 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
738 if self.params.get('verbose', False):
741 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
744 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
745 retval = subprocess.call(args)
746 while retval == 2 or retval == 1:
747 prevsize = os.path.getsize(encodeFilename(tmpfilename))
748 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
749 time.sleep(5.0) # This seems to be needed
750 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
751 cursize = os.path.getsize(encodeFilename(tmpfilename))
752 if prevsize == cursize and retval == 1:
754 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
755 if prevsize == cursize and retval == 2 and cursize > 1024:
756 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
760 fsize = os.path.getsize(encodeFilename(tmpfilename))
761 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
762 self.try_rename(tmpfilename, filename)
763 self._hook_progress({
764 'downloaded_bytes': fsize,
765 'total_bytes': fsize,
766 'filename': filename,
767 'status': 'finished',
771 self.to_stderr(u"\n")
772 self.report_error(u'rtmpdump exited with code %d' % retval)
775 def _do_download(self, filename, info_dict):
776 url = info_dict['url']
778 # Check file already present
779 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
780 self.report_file_already_downloaded(filename)
781 self._hook_progress({
782 'filename': filename,
783 'status': 'finished',
787 # Attempt to download using rtmpdump
788 if url.startswith('rtmp'):
789 return self._download_with_rtmpdump(filename, url,
790 info_dict.get('player_url', None),
791 info_dict.get('page_url', None),
792 info_dict.get('play_path', None))
794 tmpfilename = self.temp_name(filename)
797 # Do not include the Accept-Encoding header
798 headers = {'Youtubedl-no-compression': 'True'}
799 if 'user_agent' in info_dict:
800 headers['Youtubedl-user-agent'] = info_dict['user_agent']
801 basic_request = compat_urllib_request.Request(url, None, headers)
802 request = compat_urllib_request.Request(url, None, headers)
804 if self.params.get('test', False):
805 request.add_header('Range','bytes=0-10240')
807 # Establish possible resume length
808 if os.path.isfile(encodeFilename(tmpfilename)):
809 resume_len = os.path.getsize(encodeFilename(tmpfilename))
815 if self.params.get('continuedl', False):
816 self.report_resuming_byte(resume_len)
817 request.add_header('Range','bytes=%d-' % resume_len)
823 retries = self.params.get('retries', 0)
824 while count <= retries:
825 # Establish connection
827 if count == 0 and 'urlhandle' in info_dict:
828 data = info_dict['urlhandle']
829 data = compat_urllib_request.urlopen(request)
831 except (compat_urllib_error.HTTPError, ) as err:
832 if (err.code < 500 or err.code >= 600) and err.code != 416:
833 # Unexpected HTTP error
835 elif err.code == 416:
836 # Unable to resume (requested range not satisfiable)
838 # Open the connection again without the range header
839 data = compat_urllib_request.urlopen(basic_request)
840 content_length = data.info()['Content-Length']
841 except (compat_urllib_error.HTTPError, ) as err:
842 if err.code < 500 or err.code >= 600:
845 # Examine the reported length
846 if (content_length is not None and
847 (resume_len - 100 < int(content_length) < resume_len + 100)):
848 # The file had already been fully downloaded.
849 # Explanation to the above condition: in issue #175 it was revealed that
850 # YouTube sometimes adds or removes a few bytes from the end of the file,
851 # changing the file size slightly and causing problems for some users. So
852 # I decided to implement a suggested change and consider the file
853 # completely downloaded if the file size differs less than 100 bytes from
854 # the one in the hard drive.
855 self.report_file_already_downloaded(filename)
856 self.try_rename(tmpfilename, filename)
857 self._hook_progress({
858 'filename': filename,
859 'status': 'finished',
863 # The length does not match, we start the download over
864 self.report_unable_to_resume()
870 self.report_retry(count, retries)
873 self.report_error(u'giving up after %s retries' % retries)
876 data_len = data.info().get('Content-length', None)
877 if data_len is not None:
878 data_len = int(data_len) + resume_len
879 min_data_len = self.params.get("min_filesize", None)
880 max_data_len = self.params.get("max_filesize", None)
881 if min_data_len is not None and data_len < min_data_len:
882 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
884 if max_data_len is not None and data_len > max_data_len:
885 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
888 data_len_str = self.format_bytes(data_len)
889 byte_counter = 0 + resume_len
890 block_size = self.params.get('buffersize', 1024)
895 data_block = data.read(block_size)
897 if len(data_block) == 0:
899 byte_counter += len(data_block)
901 # Open file just in time
904 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
905 assert stream is not None
906 filename = self.undo_temp_name(tmpfilename)
907 self.report_destination(filename)
908 except (OSError, IOError) as err:
909 self.report_error(u'unable to open for writing: %s' % str(err))
912 stream.write(data_block)
913 except (IOError, OSError) as err:
914 self.to_stderr(u"\n")
915 self.report_error(u'unable to write data: %s' % str(err))
917 if not self.params.get('noresizebuffer', False):
918 block_size = self.best_block_size(after - before, len(data_block))
921 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
923 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
925 percent_str = self.calc_percent(byte_counter, data_len)
926 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
927 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
929 self._hook_progress({
930 'downloaded_bytes': byte_counter,
931 'total_bytes': data_len,
932 'tmpfilename': tmpfilename,
933 'filename': filename,
934 'status': 'downloading',
938 self.slow_down(start, byte_counter - resume_len)
941 self.to_stderr(u"\n")
942 self.report_error(u'Did not get any data blocks')
946 if data_len is not None and byte_counter != data_len:
947 raise ContentTooShortError(byte_counter, int(data_len))
948 self.try_rename(tmpfilename, filename)
950 # Update file modification time
951 if self.params.get('updatetime', True):
952 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
954 self._hook_progress({
955 'downloaded_bytes': byte_counter,
956 'total_bytes': byte_counter,
957 'filename': filename,
958 'status': 'finished',
963 def _hook_progress(self, status):
964 for ph in self._progress_hooks:
967 def add_progress_hook(self, ph):
968 """ ph gets called on download progress, with a dictionary with the entries
969 * filename: The final filename
970 * status: One of "downloading" and "finished"
972 It can also have some of the following entries:
974 * downloaded_bytes: Bytes on disks
975 * total_bytes: Total bytes, None if unknown
976 * tmpfilename: The filename we're currently writing to
978 Hooks are guaranteed to be called at least once (with status "finished")
979 if the download is successful.
981 self._progress_hooks.append(ph)