remove unnecessary regex group names

[youtube-dl.git] / youtube_dl / extractor / googledrive.py
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py

index 8c611fa47fb5ceed066c5b3760996a583d6040c6..6d9bcfefdd1bfda424004f706d2a1d8eea711357 100644 (file)
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -1,14 +1,43 @@
+import re
+
  from .common import InfoExtractor
  from ..utils import RegexNotFoundError
  
+class GoogleDriveEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
+    _TEST = {
+        'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
+        'info_dict': {
+            'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
+            'ext': 'mp4',
+            'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
+        }
+    }
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+            webpage)
+        if mobj:
+            return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return {
+            '_type': 'url',
+            'ie-key': 'GoogleDrive',
+            'url': 'https://drive.google.com/file/d/%s' % video_id
+        }
+
  class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
+    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
      _TEST = {
-        'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
+        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
          'info_dict': {
-            'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
+            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
              'ext': 'mp4',
-            'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
+            'title': 'Big Buck Bunny.mp4',
          }
      }
      _formats = {
@@ -33,46 +62,40 @@ class GoogleDriveIE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
          webpage = self._download_webpage(
-            'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
+            'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
          )
          try:
              title = self._html_search_regex(
-                r'"title","(?P<title>.*?)"',
+                r'"title"\s+,\s+"[^"]+',
                  webpage,
-                'title',
-                group='title'
+                'title'
              )
              fmt_stream_map = self._html_search_regex(
-                r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
+                r'"fmt_stream_map"\s+,\s+"[^"]+',
                  webpage,
-                'fmt_stream_map',
-                group='fmt_stream_map'
+                'fmt_stream_map'
              )
              fmt_list = self._html_search_regex(
-                r'"fmt_list","(?P<fmt_list>.*?)"',
+                r'"fmt_list"\s+,\s+"[^"]+',
                  webpage,
-                'fmt_list',
-                group='fmt_list'
+                'fmt_list'
              )
  #                      timestamp = self._html_search_regex(
-#                              r'"timestamp","(?P<timestamp>.*?)"',
+#                              r'"timestamp"\s+,\s+"[^"]+',
  #                              webpage,
-#                              'timestamp',
-#                              group='timestamp'
+#                              'timestamp'
  #                      )
              length_seconds = self._html_search_regex(
-                r'"length_seconds","(?P<length_seconds>.*?)"',
+                r'"length_seconds"\s+,\s+"[^"]+',
                  webpage,
-                'length_seconds',
-                group='length_seconds'
+                'length_seconds'
              )
          except RegexNotFoundError:
              try:
                  reason = self._html_search_regex(
-                    r'"reason","(?P<reason>.*?)"',
+                    r'"reason","[^"]+',
                      webpage,
-                    'reason',
-                    group='reason'
+                    'reason'
                  )
                  self.report_warning(reason)
                  return