import ctypes
from .compat import (
- compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
compat_tokenize_tokenize,
compat_urllib_error,
compat_urllib_request,
+ compat_urllib_request_DataHandler,
)
from .utils import (
- escape_url,
ContentTooShortError,
date_from_str,
DateRange,
ExtractorError,
format_bytes,
formatSeconds,
- HEADRequest,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
version_tuple,
write_json_file,
write_string,
+ YoutubeDLCookieProcessor,
YoutubeDLHandler,
prepend_extension,
replace_extension,
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
"""
params = None
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
- self.params = params
+ self.params = {
+ # Default parameters
+ 'nocheckcertificate': False,
+ }
+ self.params.update(params)
self.cache = Cache(self)
if params.get('bidi_workaround', False):
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
- outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
# to workaround encoding issues with subprocess on python2 @ Windows
if sys.version_info < (3, 0) and sys.platform == 'win32':
filename = encodeFilename(filename, True).decode(preferredencoding())
- return filename
+ return sanitize_path(filename)
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
extra_info=extra)
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result
elif result_type == 'compat_list':
self.report_warning(
PICKFIRST = 'PICKFIRST'
MERGE = 'MERGE'
SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
def _parse_filter(tokens):
else:
filter_parts.append(string)
- def _parse_format_selection(tokens, endwith=[]):
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the sourrounding strings
+ # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string, start, end, line in tokens:
+ if type == tokenize.OP and string == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string, start, end, line in tokens:
+ yield type, string, start, end, line
+ if type == tokenize.OP and string == ']':
+ break
+ elif type == tokenize.OP and string in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string
+ last_start = start
+ last_end = end
+ else:
+ last_string += string
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
for type, string, start, _, _ in tokens:
elif type in [tokenize.NAME, tokenize.NUMBER]:
current_selector = FormatSelector(SINGLE, string, [])
elif type == tokenize.OP:
- if string in endwith:
+ if string == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
break
- if string == ',':
+ elif inside_merge and string in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string == ',':
+ tokens.restore_last_token()
+ break
+ elif string == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
selectors.append(current_selector)
current_selector = None
elif string == '/':
+ if not current_selector:
+ raise syntax_error('"/" must follow a format selector', start)
first_choice = current_selector
- second_choice = _parse_format_selection(tokens, [','])
- current_selector = None
- selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
elif string == '[':
if not current_selector:
current_selector = FormatSelector(SINGLE, 'best', [])
format_filter = _parse_filter(tokens)
current_selector.filters.append(format_filter)
+ elif string == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
elif string == '+':
video_selector = current_selector
- audio_selector = _parse_format_selection(tokens, [','])
- current_selector = None
- selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), []))
+ audio_selector = _parse_format_selection(tokens, inside_merge=True)
+ if not video_selector or not audio_selector:
+ raise syntax_error('"+" must be between two format selectors', start)
+ current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
else:
raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
elif type == tokenize.ENDMARKER:
for format in f(formats):
yield format
return selector_function
+ elif selector.type == GROUP:
+ selector_function = _build_selector_function(selector.selector)
elif selector.type == PICKFIRST:
fs = [_build_selector_function(s) for s in selector.selector]
format_spec = selector.selector
def selector_function(formats):
+ formats = list(formats)
+ if not formats:
+ return
if format_spec == 'all':
for f in formats:
yield f
return final_selector
stream = io.BytesIO(format_spec.encode('utf-8'))
- tokens = compat_tokenize_tokenize(stream.readline)
- parsed_selector = _parse_format_selection(tokens)
+ try:
+ tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict):
t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url')))
for i, t in enumerate(thumbnails):
- if 'width' in t and 'height' in t:
+ if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
if t.get('id') is None:
t['id'] = '%d' % i
except (ValueError, OverflowError, OSError):
pass
+ subtitles = info_dict.get('subtitles')
+ if subtitles:
+ for _, subtitle in subtitles.items():
+ for subtitle_format in subtitle:
+ if 'ext' not in subtitle_format:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
- self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
return
info_dict['requested_subtitles'] = self.process_subtitles(
- info_dict['id'], info_dict.get('subtitles'),
+ info_dict['id'], subtitles,
info_dict.get('automatic_captions'))
# We now pick which formats have to be downloaded
if req_format is None:
req_format_list = []
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- info_dict['extractor'] in ['youtube', 'ted']):
+ info_dict['extractor'] in ['youtube', 'ted'] and
+ not info_dict.get('is_live')):
merger = FFmpegMergerPP(self)
if merger.available and merger.can_merge():
req_format_list.append('bestvideo+bestaudio')
def urlopen(self, req):
""" Start an HTTP download """
-
- # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
- # always respected by websites, some tend to give out URLs with non percent-encoded
- # non-ASCII characters (see telemb.py, ard.py [#3412])
- # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
- # To work around aforementioned issue we will replace request's original URL with
- # percent-encoded one
- req_is_string = isinstance(req, compat_basestring)
- url = req if req_is_string else req.get_full_url()
- url_escaped = escape_url(url)
-
- # Substitute URL if any change after escaping
- if url != url_escaped:
- if req_is_string:
- req = url_escaped
- else:
- req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
- req = req_type(
- url_escaped, data=req.data, headers=req.headers,
- origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
-
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
- cookie_processor = compat_urllib_request.HTTPCookieProcessor(
- self.cookiejar)
+ cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ data_handler = compat_urllib_request_DataHandler()
opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh)
+ proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
(info_dict['extractor'], info_dict['id'], thumb_display_id))
try:
uf = self.urlopen(t['url'])
- with open(thumb_filename, 'wb') as thumbf:
+ with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))