import itertools
from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
- clean_html,
+from ..compat import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
+)
+from ..utils import (
ExtractorError,
- get_element_by_attribute,
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
self._download_webpage(login_request, None, False, 'Wrong login info')
-class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
+class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
password_request = compat_urllib_request.Request(pass_url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
- self._download_webpage(password_request, video_id,
- 'Verifying the password',
- 'Wrong password')
+ return self._download_webpage(
+ password_request, video_id,
+ 'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword', None)
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config = self._search_regex(config_re, webpage, 'info section',
- flags=re.DOTALL)
+ flags=re.DOTALL)
config = json.loads(config)
except Exception as e:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
- if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+ if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
self._verify_video_password(url, video_id, webpage)
return self._real_extract(url)
else:
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
- subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
-
- video_subtitles = self.extract_subtitles(video_id, subtitles)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': 'http://vimeo.com' + tt['url'],
+ }]
return {
'id': video_id,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
- 'subtitles': video_subtitles,
+ 'subtitles': subtitles,
}
def _extract_list_title(self, webpage):
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
+ def _login_list_password(self, page_url, list_id, webpage):
+ login_form = self._search_regex(
+ r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
+ webpage, 'login form', default=None)
+ if not login_form:
+ return webpage
+
+ password = self._downloader.params.get('videopassword', None)
+ if password is None:
+ raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ value="([^"]*)"
+ ''', login_form))
+ token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ fields['token'] = token
+ fields['password'] = password
+ post = compat_urllib_parse.urlencode(fields)
+ password_path = self._search_regex(
+ r'action="([^"]+)"', login_form, 'password URL')
+ password_url = compat_urlparse.urljoin(page_url, password_path)
+ password_request = compat_urllib_request.Request(password_url, post)
+ password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
+ self._set_cookie('vimeo.com', 'xsrft', token)
+
+ return self._download_webpage(
+ password_request, list_id,
+ 'Verifying the password', 'Wrong password')
+
def _extract_videos(self, list_id, base_url):
video_ids = []
for pagenum in itertools.count(1):
+ page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage(
- self._page_url(base_url, pagenum), list_id,
+ page_url, list_id,
'Downloading page %s' % pagenum)
+
+ if pagenum == 1:
+ webpage = self._login_list_password(page_url, list_id, webpage)
+
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
'title': 'Staff Favorites: November 2013',
},
'playlist_mincount': 13,
+ }, {
+ 'note': 'Password-protected album',
+ 'url': 'https://vimeo.com/album/3253534',
+ 'info_dict': {
+ 'title': 'test',
+ 'id': '3253534',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'videopassword': 'youtube-dl',
+ }
}]
def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- album_id = mobj.group('id')
+ album_id = self._match_id(url)
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
- 'file': '75524534.mp4',
'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': {
+ 'id': '75524534',
+ 'ext': 'mp4',
'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick',
}
'info_dict': {
'id': '91613211',
'ext': 'mp4',
- 'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
+ 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
'thumbnail': 're:^https?://.*\.jpg$',