]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/googledrive.py
remove unnecessary regex group names
[youtube-dl.git] / youtube_dl / extractor / googledrive.py
index 8c611fa47fb5ceed066c5b3760996a583d6040c6..6d9bcfefdd1bfda424004f706d2a1d8eea711357 100644 (file)
@@ -1,14 +1,43 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import RegexNotFoundError
 
+class GoogleDriveEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
+    _TEST = {
+        'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
+        'info_dict': {
+            'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
+            'ext': 'mp4',
+            'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
+        }
+    }
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+            webpage)
+        if mobj:
+            return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return {
+            '_type': 'url',
+            'ie-key': 'GoogleDrive',
+            'url': 'https://drive.google.com/file/d/%s' % video_id
+        }
+
 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
+    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
     _TEST = {
-        'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
+        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
         'info_dict': {
-            'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
+            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
             'ext': 'mp4',
-            'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
+            'title': 'Big Buck Bunny.mp4',
         }
     }
     _formats = {
@@ -33,46 +62,40 @@ class GoogleDriveIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
-            'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
+            'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
         )
         try:
             title = self._html_search_regex(
-                r'"title","(?P<title>.*?)"',
+                r'"title"\s+,\s+"[^"]+',
                 webpage,
-                'title',
-                group='title'
+                'title'
             )
             fmt_stream_map = self._html_search_regex(
-                r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
+                r'"fmt_stream_map"\s+,\s+"[^"]+',
                 webpage,
-                'fmt_stream_map',
-                group='fmt_stream_map'
+                'fmt_stream_map'
             )
             fmt_list = self._html_search_regex(
-                r'"fmt_list","(?P<fmt_list>.*?)"',
+                r'"fmt_list"\s+,\s+"[^"]+',
                 webpage,
-                'fmt_list',
-                group='fmt_list'
+                'fmt_list'
             )
 #                      timestamp = self._html_search_regex(
-#                              r'"timestamp","(?P<timestamp>.*?)"',
+#                              r'"timestamp"\s+,\s+"[^"]+',
 #                              webpage,
-#                              'timestamp',
-#                              group='timestamp'
+#                              'timestamp'
 #                      )
             length_seconds = self._html_search_regex(
-                r'"length_seconds","(?P<length_seconds>.*?)"',
+                r'"length_seconds"\s+,\s+"[^"]+',
                 webpage,
-                'length_seconds',
-                group='length_seconds'
+                'length_seconds'
             )
         except RegexNotFoundError:
             try:
                 reason = self._html_search_regex(
-                    r'"reason","(?P<reason>.*?)"',
+                    r'"reason","[^"]+',
                     webpage,
-                    'reason',
-                    group='reason'
+                    'reason'
                 )
                 self.report_warning(reason)
                 return