from __future__ import absolute_import, unicode_literals
import collections
+import contextlib
import datetime
import errno
+import fileinput
import io
import itertools
import json
compat_basestring,
compat_cookiejar,
compat_expanduser,
+ compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
ExtractorError,
format_bytes,
formatSeconds,
- get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
parse_filesize,
+ PerRequestProxyHandler,
PostProcessingError,
platform_name,
preferredencoding,
render_table,
SameFileError,
sanitize_filename,
+ sanitize_path,
std_headers,
subtitles_filename,
takewhile_inclusive,
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
+ subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
+ cn_verification_proxy: URL of the proxy to use for IP address verification
+ on Chinese sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
- * status: One of "downloading" and "finished".
+ * status: One of "downloading", "error", or "finished".
Check this first and ignore unknown values.
- If status is one of "downloading" or "finished", the
+ If status is one of "downloading", or "finished", the
following properties may also be present:
* filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
- * tmpfilename: The filename we're currently writing to
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if
unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
- the FileDownloader:
+ the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize.
+ xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
try:
import pty
master, slave = pty.openpty()
- width = get_term_width()
+ width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
+ sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+ not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
self._setup_opener()
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
- '''
+ '''
if ie_key:
ies = [self.get_info_extractor(ie_key)]
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ return
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
+
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
if req_format is None:
req_format = 'best'
formats_to_download = []
- # The -1 is for supporting YoutubeIE
- if req_format in ('-1', 'all'):
+ if req_format == 'all':
formats_to_download = formats
else:
for rfstr in req_format.split(','):
info_dict.update(formats_to_download[-1])
return info_dict
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
return
try:
- dn = os.path.dirname(encodeFilename(filename))
+ dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
- if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat', 'srt')
- for sub_lang in subtitles.keys():
- sub = subtitles[sub_lang]
- if sub is None:
- continue
+ subtitles = info_dict['requested_subtitles']
+ ie = self.get_info_extractor(info_dict['extractor_key'])
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ if sub_info.get('data') is not None:
+ sub_data = sub_info['data']
+ else:
+ try:
+ sub_data = ie._download_webpage(
+ sub_info['url'], info_dict['id'], note=False)
+ except ExtractorError as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, compat_str(err.cause)))
+ continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
- '%' not in outtmpl
- and self.params.get('max_downloads') != 1):
+ '%' not in outtmpl and
+ self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
return self._download_retcode
def download_with_info_file(self, info_filename):
- with io.open(info_filename, 'r', encoding='utf-8') as f:
- info = json.load(f)
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = json.loads('\n'.join(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:
return res
def list_formats(self, info_dict):
- def line(format, idlen=20):
- return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
- format['format_id'],
- format['ext'],
- self.format_resolution(format),
- self._format_note(format),
- ))
-
formats = info_dict.get('formats', [info_dict])
- idlen = max(len('format code'),
- max(len(f['format_id']) for f in formats))
- formats_s = [
- line(f, idlen) for f in formats
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
- formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
- header_line = line({
- 'format_id': 'format code', 'ext': 'extension',
- 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
+ header_line = ['format code', 'extension', 'resolution', 'note']
self.to_screen(
- '[info] Available formats for %s:\n%s\n%s' %
- (info_dict['id'], header_line, '\n'.join(formats_s)))
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
def urlopen(self, req):
""" Start an HTTP download """
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+ # The ssl context is only available in python 2.7.9 and 3.x
+ if hasattr(https_handler, '_context'):
+ ctx = https_handler._context
+ # get_ca_certs is unavailable prior to python 3.4
+ if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0:
+ self.report_warning(
+ 'No ssl certificates were loaded, urls that use https '
+ 'won\'t work')
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
- https_handler, proxy_handler, cookie_processor, ydlh)
+ proxy_handler, https_handler, cookie_processor, ydlh)
+
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)