updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
- writesubtitles: Write the video subtitles to a .srt file
+ writesubtitles: Write the video subtitles to a file (default=srt)
+ onlysubtitles: Downloads only the subtitles of the video
+ allsubtitles: Downloads all the subtitles of the video
subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader.
+ keepvideo: Keep the video file after post-processing
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
"""
params = None
"""Create a FileDownloader object with the given options."""
self._ies = []
self._pps = []
+ self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
- sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+ self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
- def trouble(self, message=None):
+ def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or
not when errors are found, after printing the message.
+
+ tb, if given, is additional traceback information.
"""
if message is not None:
self.to_stderr(message)
if self.params.get('verbose'):
- self.to_stderr(u''.join(traceback.format_list(traceback.extract_stack())))
+ if tb is None:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = u''.join(tb_data)
+ self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
raise DownloadError(message)
self._download_retcode = 1
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
- def report_writesubtitles(self, srtfn):
+ def report_writesubtitles(self, sub_filename):
""" Report that the subtitles file is being written """
- self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+ self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
"""Report download progress."""
if self.params.get('noprogress', False):
return
- self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(u'[download] %s of %s at %s ETA %s' %
+ (percent_str, data_len_str, speed_str, eta_str))
+ else:
+ self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
- matchtitle = matchtitle.decode('utf8')
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
- rejecttitle = rejecttitle.decode('utf8')
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
+ subtitle = info_dict['subtitles'][0]
+ (sub_error, sub_lang, sub) = subtitle
try:
- srtfn = filename.rsplit('.', 1)[0] + u'.srt'
- self.report_writesubtitles(srtfn)
- with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
- srtfile.write(info_dict['subtitles'])
+ sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt'
+ self.report_writesubtitles(sub_filename)
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+ subfile.write(sub)
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
return
+ if self.params.get('onlysubtitles', False):
+ return
+
+ if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+ subtitles = info_dict['subtitles']
+ for subtitle in subtitles:
+ (sub_error, sub_lang, sub) = subtitle
+ try:
+ sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt'
+ self.report_writesubtitles(sub_filename)
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+ subfile.write(sub)
+ except (OSError, IOError):
+ self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+ return
+ if self.params.get('onlysubtitles', False):
+ return
if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json'
# Warn if the _WORKING attribute is False
if not ie.working():
- self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
- u'and will probably not work. If you want to go on, use the -i option.')
+ self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
+ u'and will probably not work. If you want to go on, use the -i option.')
# Suitable InfoExtractor found
suitable_found = True
# Extract information from URL and process it
- videos = ie.extract(url)
+ try:
+ videos = ie.extract(url)
+ except ExtractorError as de: # An error we somewhat expected
+ self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
+ break
+ except Exception as e:
+ if self.params.get('ignoreerrors', False):
+ self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
+ break
+ else:
+ raise
- if len(videos) > 1 and self.fixed_template():
+ if len(videos or []) > 1 and self.fixed_template():
raise SameFileError(self.params['outtmpl'])
for video in videos or []:
return self._download_retcode
def post_process(self, filename, ie_info):
- """Run the postprocessing chain on the given file."""
+ """Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
+ keep_video = None
for pp in self._pps:
- info = pp.run(info)
- if info is None:
- break
+ try:
+ keep_video_wish,new_info = pp.run(info)
+ if keep_video_wish is not None:
+ if keep_video_wish:
+ keep_video = keep_video_wish
+ elif keep_video is None:
+ # No clear decision yet, let IE decide
+ keep_video = keep_video_wish
+ except PostProcessingError as e:
+ self.to_stderr(u'ERROR: ' + e.msg)
+ if keep_video is False and not self.params.get('keepvideo', False):
+ try:
+ self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+ os.remove(encodeFilename(filename))
+ except (IOError, OSError):
+ self.to_stderr(u'WARNING: Unable to remove downloaded video file')
- def _download_with_rtmpdump(self, filename, url, player_url):
+ def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+ basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+ if player_url is not None:
+ basic_args += ['-W', player_url]
+ if page_url is not None:
+ basic_args += ['--pageUrl', page_url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
retval = 0
break
if retval == 0:
- self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
else:
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
def _do_download(self, filename, info_dict):
url = info_dict['url']
- player_url = info_dict.get('player_url', None)
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
- return self._download_with_rtmpdump(filename, url, player_url)
+ return self._download_with_rtmpdump(filename, url,
+ info_dict.get('player_url', None),
+ info_dict.get('page_url', None))
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
+ if 'user_agent' in info_dict:
+ headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
else:
# The length does not match, we start the download over
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
+ min_data_len = self.params.get("min_filesize", None)
+ max_data_len = self.params.get("max_filesize", None)
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
+
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ })
+
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+
return True
+
+ def _hook_progress(self, status):
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ """ ph gets called on download progress, with a dictionary with the entries
+ * filename: The final filename
+ * status: One of "downloading" and "finished"
+
+ It can also have some of the following entries:
+
+ * downloaded_bytes: Bytes on disks
+ * total_bytes: Total bytes, None if unknown
+ * tmpfilename: The filename we're currently writing to
+
+ Hooks are guaranteed to be called at least once (with status "finished")
+ if the download is successful.
+ """
+ self._progress_hooks.append(ph)