[dailymotion] Extract duration (closes #6221)

[youtube-dl.git] / youtube_dl / extractor / vk.py
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index 0f7ce45ca7cc0c641ae69b58943f9252a4bfb7ba..8ac3aeac0ca5a4ebf41654a88953c5b975cfef4a 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -29,7 +29,7 @@ class VKIE(InfoExtractor):
                                  (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
                                  (?:www\.)?biqle\.ru/watch/
                              )
-                            (?P<videoid>[^s].*?)(?:\?|%2F|$)
+                            (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
                          )
                      '''
      _NETRC_MACHINE = 'vk'
@@ -119,6 +119,35 @@ class VKIE(InfoExtractor):
              },
              'skip': 'Only works from Russia',
          },
+        {
+            # video (removed?) only available with list id
+            'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+            'md5': '091287af5402239a1051c37ec7b92913',
+            'info_dict': {
+                'id': '171201961',
+                'ext': 'mp4',
+                'title': 'ТюменцевВВ_09.07.2015',
+                'uploader': 'Anton Ivanov',
+                'duration': 109,
+                'upload_date': '20150709',
+                'view_count': int,
+            },
+        },
+        {
+            # youtube embed
+            'url': 'https://vk.com/video276849682_170681728',
+            'info_dict': {
+                'id': 'V3K4mi0SYkc',
+                'ext': 'mp4',
+                'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+                'duration': 179,
+                'upload_date': '20130116',
+                'uploader': "Children's Joy Foundation",
+                'uploader_id': 'thecjf',
+                'view_count': int,
+            },
+        },
          {
              # removed video, just testing that we match the pattern
              'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -139,9 +168,7 @@ class VKIE(InfoExtractor):
          login_page = self._download_webpage(
              'https://vk.com', None, 'Downloading login page')
  
-        login_form = dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            login_page))
+        login_form = self._form_hidden_inputs(login_page)
  
          login_form.update({
              'email': username.encode('cp1251'),
@@ -169,6 +196,12 @@ class VKIE(InfoExtractor):
              video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
  
          info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+
+        # Some videos (removed?) can only be downloaded with list id specified
+        list_id = mobj.group('list_id')
+        if list_id:
+            info_url += '&list=%s' % list_id
+
          info_page = self._download_webpage(info_url, video_id)
  
          if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
@@ -189,16 +222,20 @@ class VKIE(InfoExtractor):
  
              r'<!>Видео временно недоступно':
              'Video %s is temporarily unavailable.',
+
+            r'<!>Access denied':
+            'Access denied to video %s.',
          }
  
          for error_re, error_msg in ERRORS.items():
              if re.search(error_re, info_page):
                  raise ExtractorError(error_msg % video_id, expected=True)
  
-        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
-        if m_yt is not None:
-            self.to_screen('Youtube video detected')
-            return self.url_result(m_yt.group(1), 'Youtube')
+        youtube_url = self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+            info_page, 'youtube iframe', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube')
  
          m_rutube = re.search(
              r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)