The automatic captions are stored in the 'automactic_captions' field, which is used if no normal subtitles are found for an specific language.
url = 'QRS8MkLhQmM'
IE = YoutubeIE
url = 'QRS8MkLhQmM'
IE = YoutubeIE
- def test_youtube_no_writesubtitles(self):
- self.DL.params['writesubtitles'] = False
- subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
-
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
- self.list_subtitles(info_dict['id'], info_dict.get('subtitles'))
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
- info_dict['requested_subtitles'] = self.process_subtitles(info_dict['id'], info_dict.get('subtitles'))
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
info_dict.update(formats_to_download[-1])
return info_dict
info_dict.update(formats_to_download[-1])
return info_dict
- def process_subtitles(self, video_id, available_subs):
+ def process_subtitles(self, video_id, available_subs, available_autocaps):
"""Select the requested subtitles and their format"""
"""Select the requested subtitles and their format"""
+ if available_autocaps and self.params.get('writeautomaticsub'):
+ available_subs = available_subs.copy()
+ for lang, cap_info in available_autocaps.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
if not available_subs:
return available_subs
if not available_subs:
return available_subs
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
- def list_subtitles(self, video_id, subtitles):
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
- self.to_screen('%s has no subtitles' % video_id)
+ self.to_screen('%s has no %s' % (video_id, name))
return
header_line = 'Language formats'
sub_lines = [
'%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))
for lang, formats in subtitles.items()]
self.to_screen(
return
header_line = 'Language formats'
sub_lines = [
'%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))
for lang, formats in subtitles.items()]
self.to_screen(
- 'Available subtitles for %s:\n%s\n%s' %
- (video_id, header_line, '\n'.join(sub_lines)))
+ 'Available %s for %s:\n%s\n%s' %
+ (name, video_id, header_line, '\n'.join(sub_lines)))
def urlopen(self, req):
""" Start an HTTP download """
def urlopen(self, req):
""" Start an HTTP download """
with the "ext" entry and one of:
* "data": The subtitles file contents
* "url": A url pointing to the subtitles file
with the "ext" entry and one of:
* "data": The subtitles file contents
* "url": A url pointing to the subtitles file
+ automatic_captions: Like 'subtitles', used by the YoutubeIE for
+ automatically generated captions
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ def extract_automatic_captions(self, *args, **kwargs):
+ automatic_captions = {}
+ list_subtitles = self._downloader.params.get('listsubtitles')
+ if self._downloader.params.get('writeautomaticsub', False) or list_subtitles:
+ automatic_captions.update(self._get_automatic_captions(*args, **kwargs))
+ return automatic_captions
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError("This method must be implemented by subclasses")
+
class SearchInfoExtractor(InfoExtractor):
"""
class SearchInfoExtractor(InfoExtractor):
"""
import traceback
from .common import InfoExtractor, SearchInfoExtractor
import traceback
from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
-class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
_VALID_URL = r"""(?x)^
(
IE_DESC = 'YouTube.com'
_VALID_URL = r"""(?x)^
(
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
- def _get_available_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, webpage):
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
lang = track.attrib['lang_code']
if lang in sub_lang_list:
continue
lang = track.attrib['lang_code']
if lang in sub_lang_list:
continue
- params = compat_urllib_parse.urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
- 'name': track.attrib['name'].encode('utf-8'),
- })
- url = 'https://www.youtube.com/api/timedtext?' + params
- sub_lang_list[lang] = url
+ sub_formats = []
+ for ext in ['sbv', 'vtt', 'srt']:
+ params = compat_urllib_parse.urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': ext,
+ 'name': track.attrib['name'].encode('utf-8'),
+ })
+ sub_formats.append({
+ 'url': 'https://www.youtube.com/api/timedtext?' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[lang] = sub_formats
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
return sub_lang_list
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
return sub_lang_list
- def _get_available_automatic_caption(self, video_id, webpage):
+ def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
- sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
- params = compat_urllib_parse.urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': caption_kind,
- })
- sub_lang_list[sub_lang] = caption_url + '&' + params
+ sub_formats = []
+ for ext in ['sbv', 'vtt', 'srt']:
+ params = compat_urllib_parse.urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': ext,
+ 'ts': timestamp,
+ 'kind': caption_kind,
+ })
+ sub_formats.append({
+ 'url': caption_url + '&' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
return sub_lang_list
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, video_webpage)
- return
+ automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
if 'length_seconds' not in video_info:
self._downloader.report_warning('unable to extract video duration')
if 'length_seconds' not in video_info:
self._downloader.report_warning('unable to extract video duration')
'description': video_description,
'categories': video_categories,
'subtitles': video_subtitles,
'description': video_description,
'categories': video_categories,
'subtitles': video_subtitles,
+ 'automatic_captions': automatic_captions,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,