]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/vk.py
[dailymotion] Extract duration (closes #6221)
[youtube-dl.git] / youtube_dl / extractor / vk.py
index 0f7ce45ca7cc0c641ae69b58943f9252a4bfb7ba..8ac3aeac0ca5a4ebf41654a88953c5b975cfef4a 100644 (file)
@@ -29,7 +29,7 @@ class VKIE(InfoExtractor):
                                 (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
                                 (?:www\.)?biqle\.ru/watch/
                             )
-                            (?P<videoid>[^s].*?)(?:\?|%2F|$)
+                            (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
                         )
                     '''
     _NETRC_MACHINE = 'vk'
@@ -119,6 +119,35 @@ class VKIE(InfoExtractor):
             },
             'skip': 'Only works from Russia',
         },
+        {
+            # video (removed?) only available with list id
+            'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+            'md5': '091287af5402239a1051c37ec7b92913',
+            'info_dict': {
+                'id': '171201961',
+                'ext': 'mp4',
+                'title': 'ТюменцевВВ_09.07.2015',
+                'uploader': 'Anton Ivanov',
+                'duration': 109,
+                'upload_date': '20150709',
+                'view_count': int,
+            },
+        },
+        {
+            # youtube embed
+            'url': 'https://vk.com/video276849682_170681728',
+            'info_dict': {
+                'id': 'V3K4mi0SYkc',
+                'ext': 'mp4',
+                'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+                'duration': 179,
+                'upload_date': '20130116',
+                'uploader': "Children's Joy Foundation",
+                'uploader_id': 'thecjf',
+                'view_count': int,
+            },
+        },
         {
             # removed video, just testing that we match the pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -139,9 +168,7 @@ class VKIE(InfoExtractor):
         login_page = self._download_webpage(
             'https://vk.com', None, 'Downloading login page')
 
-        login_form = dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            login_page))
+        login_form = self._form_hidden_inputs(login_page)
 
         login_form.update({
             'email': username.encode('cp1251'),
@@ -169,6 +196,12 @@ class VKIE(InfoExtractor):
             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
 
         info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+
+        # Some videos (removed?) can only be downloaded with list id specified
+        list_id = mobj.group('list_id')
+        if list_id:
+            info_url += '&list=%s' % list_id
+
         info_page = self._download_webpage(info_url, video_id)
 
         if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
@@ -189,16 +222,20 @@ class VKIE(InfoExtractor):
 
             r'<!>Видео временно недоступно':
             'Video %s is temporarily unavailable.',
+
+            r'<!>Access denied':
+            'Access denied to video %s.',
         }
 
         for error_re, error_msg in ERRORS.items():
             if re.search(error_re, info_page):
                 raise ExtractorError(error_msg % video_id, expected=True)
 
-        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
-        if m_yt is not None:
-            self.to_screen('Youtube video detected')
-            return self.url_result(m_yt.group(1), 'Youtube')
+        youtube_url = self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+            info_page, 'youtube iframe', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube')
 
         m_rutube = re.search(
             r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)