X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=7e854f3265eac3312f1b63199ce8633a17eb7d04;hb=c8e170b2092f5e2ad9ea8fd7fb2eedd35e307a1c;hp=32490a8ed85a82509d3faea6681cf1e2488ee23b;hpb=dfc8f46e1c0e47a3b080d2e38d7d6da279f18fd2;p=youtube-dl.git

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 32490a8ed..7e854f326 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -16,6 +16,7 @@ from ..utils import (
     ExtractorError,
     InAdvancePagedList,
     int_or_none,
+    NO_DEFAULT,
     RegexNotFoundError,
     sanitized_Request,
     smuggle_url,
@@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         self._set_vimeo_cookie('vuid', vuid)
         self._download_webpage(login_request, None, False, 'Wrong login info')
 
+    def _verify_video_password(self, url, video_id, webpage):
+        password = self._downloader.params.get('videopassword')
+        if password is None:
+            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+        token, vuid = self._extract_xsrft_and_vuid(webpage)
+        data = urlencode_postdata({
+            'password': password,
+            'token': token,
+        })
+        if url.startswith('http://'):
+            # vimeo only supports https now, but the user can give an http url
+            url = url.replace('http://', 'https://')
+        password_request = sanitized_Request(url + '/password', data)
+        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        password_request.add_header('Referer', url)
+        self._set_vimeo_cookie('vuid', vuid)
+        return self._download_webpage(
+            password_request, video_id,
+            'Verifying the password', 'Wrong password')
+
     def _extract_xsrft_and_vuid(self, webpage):
         xsrft = self._search_regex(
             r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@@ -146,7 +167,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                             \.
                         )?
                         vimeo(?P<pro>pro)?\.com/
-                        (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
+                        (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
                         (?:.*?/)?
                         (?:
                             (?:
@@ -314,6 +335,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
             'only_matching': True,
         },
+        {
+            'url': 'https://vimeo.com/album/2632481/video/79010983',
+            'only_matching': True,
+        },
         {
             # source file returns 403: Forbidden
             'url': 'https://vimeo.com/7809605',
@@ -339,26 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
         if mobj:
             return mobj.group(1)
-
-    def _verify_video_password(self, url, video_id, webpage):
-        password = self._downloader.params.get('videopassword')
-        if password is None:
-            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-        token, vuid = self._extract_xsrft_and_vuid(webpage)
-        data = urlencode_postdata({
-            'password': password,
-            'token': token,
-        })
-        if url.startswith('http://'):
-            # vimeo only supports https now, but the user can give an http url
-            url = url.replace('http://', 'https://')
-        password_request = sanitized_Request(url + '/password', data)
-        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        password_request.add_header('Referer', url)
-        self._set_vimeo_cookie('vuid', vuid)
-        return self._download_webpage(
-            password_request, video_id,
-            'Verifying the password', 'Wrong password')
+        # Look more for non-standard embedded Vimeo player
+        mobj = re.search(
+            r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
+        if mobj:
+            return mobj.group('url')
 
     def _verify_player_video_password(self, url, video_id):
         password = self._downloader.params.get('videopassword')
@@ -651,8 +661,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
                 webpage = self._login_list_password(page_url, list_id, webpage)
                 yield self._extract_list_title(webpage)
 
-            for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
-                yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id)
+            # Try extracting href first since not all videos are available via
+            # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+            clips = re.findall(
+                r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+            if clips:
+                for video_id, video_url in clips:
+                    yield self.url_result(
+                        compat_urlparse.urljoin(base_url, video_url),
+                        VimeoIE.ie_key(), video_id=video_id)
+            # More relaxed fallback
+            else:
+                for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+                    yield self.url_result(
+                        'https://vimeo.com/%s' % video_id,
+                        VimeoIE.ie_key(), video_id=video_id)
 
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                 break
@@ -689,7 +712,7 @@ class VimeoUserIE(VimeoChannelIE):
 
 class VimeoAlbumIE(VimeoChannelIE):
     IE_NAME = 'vimeo:album'
-    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
     _TESTS = [{
         'url': 'https://vimeo.com/album/2632481',
@@ -709,6 +732,13 @@ class VimeoAlbumIE(VimeoChannelIE):
         'params': {
             'videopassword': 'youtube-dl',
         }
+    }, {
+        'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
+        'only_matching': True,
+    }, {
+        # TODO: respect page number
+        'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
+        'only_matching': True,
     }]
 
     def _page_url(self, base_url, pagenum):
@@ -767,12 +797,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader_id': 'user22258446',
         }
+    }, {
+        'note': 'Password protected',
+        'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+        'info_dict': {
+            'id': '138823582',
+            'ext': 'mp4',
+            'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+            'uploader': 'TMB',
+            'uploader_id': 'user37284429',
+        },
+        'params': {
+            'videopassword': 'holygrail',
+        },
     }]
 
+    def _real_initialize(self):
+        self._login()
+
+    def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
+        webpage = self._download_webpage(webpage_url, video_id)
+        config_url = self._html_search_regex(
+            r'data-config-url="([^"]+)"', webpage, 'config URL',
+            default=NO_DEFAULT if video_password_verified else None)
+        if config_url is None:
+            self._verify_video_password(webpage_url, video_id, webpage)
+            config_url = self._get_config_url(
+                webpage_url, video_id, video_password_verified=True)
+        return config_url
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        config = self._download_json(
-            'https://player.vimeo.com/video/%s/config' % video_id, video_id)
+        config_url = self._get_config_url(url, video_id)
+        config = self._download_json(config_url, video_id)
         info_dict = self._parse_config(config, video_id)
         self._vimeo_sort_formats(info_dict['formats'])
         info_dict['id'] = video_id