determine_ext,
ExtractorError,
js_to_json,
- InAdvancePagedList,
int_or_none,
merge_dicts,
NO_DEFAULT,
def _parse_config(self, config, video_id):
video_data = config['video']
- # Extract title
video_title = video_data['title']
-
- # Extract uploader, uploader_url and uploader_id
- video_uploader = video_data.get('owner', {}).get('name')
- video_uploader_url = video_data.get('owner', {}).get('url')
- video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
-
- # Extract video thumbnail
- video_thumbnail = video_data.get('thumbnail')
- if video_thumbnail is None:
- video_thumbs = video_data.get('thumbs')
- if video_thumbs and isinstance(video_thumbs, dict):
- _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
-
- # Extract video duration
- video_duration = int_or_none(video_data.get('duration'))
+ live_event = video_data.get('live_event') or {}
+ is_live = live_event.get('status') == 'started'
formats = []
config_files = video_data.get('files') or config['request'].get('files', {})
'tbr': int_or_none(f.get('bitrate')),
})
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url')
if files_type == 'hls':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id=format_id,
+ 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id,
note='Downloading %s m3u8 information' % cdn_name,
fatal=False))
elif files_type == 'dash':
else:
mpd_manifest_urls = [(format_id, manifest_url)]
for f_id, m_url in mpd_manifest_urls:
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name,
f['preference'] = -40
formats.extend(mpd_formats)
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'preference': 1,
+ })
+
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if text_tracks:
'url': 'https://vimeo.com' + tt['url'],
}]
+ thumbnails = []
+ if not is_live:
+ for key, thumb in video_data.get('thumbs', {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
return {
- 'title': video_title,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
'uploader_url': video_uploader_url,
- 'thumbnail': video_thumbnail,
- 'duration': video_duration,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
}
def _extract_original_format(self, url, video_id):
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
-class VimeoLikesIE(InfoExtractor):
+class VimeoLikesIE(VimeoChannelIE):
_VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
'url': 'https://vimeo.com/user755559/likes/',
'playlist_mincount': 293,
'info_dict': {
- 'id': 'user755559_likes',
- 'description': 'See all the videos urza likes',
- 'title': 'Videos urza likes',
+ 'id': 'user755559',
+ 'title': 'urza’s Likes',
},
}, {
'url': 'https://vimeo.com/stormlapse/likes',
'only_matching': True,
}]
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
def _real_extract(self, url):
user_id = self._match_id(url)
- webpage = self._download_webpage(url, user_id)
- page_count = self._int(
- self._search_regex(
- r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
- .*?</a></li>\s*<li\s+class="pagination_next">
- ''', webpage, 'page count', default=1),
- 'page count', fatal=True)
- PAGE_SIZE = 12
- title = self._html_search_regex(
- r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
- description = self._html_search_meta('description', webpage)
-
- def _get_page(idx):
- page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % (
- user_id, idx + 1)
- webpage = self._download_webpage(
- page_url, user_id,
- note='Downloading page %d/%d' % (idx + 1, page_count))
- video_list = self._search_regex(
- r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
- webpage, 'video content')
- paths = re.findall(
- r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
- for path in paths:
- yield {
- '_type': 'url',
- 'url': compat_urlparse.urljoin(page_url, path),
- }
-
- pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
-
- return {
- '_type': 'playlist',
- 'id': '%s_likes' % user_id,
- 'title': title,
- 'description': description,
- 'entries': pl,
- }
+ return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
class VHXEmbedIE(InfoExtractor):