+import re
+
from .common import InfoExtractor
from ..utils import RegexNotFoundError
+class GoogleDriveEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
+ _TEST = {
+ 'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
+ 'info_dict': {
+ 'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
+ 'ext': 'mp4',
+ 'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
+ }
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+ webpage)
+ if mobj:
+ return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return {
+ '_type': 'url',
+ 'ie-key': 'GoogleDrive',
+ 'url': 'https://drive.google.com/file/d/%s' % video_id
+ }
+
class GoogleDriveIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
+ _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
- 'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'info_dict': {
- 'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
- 'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
+ 'title': 'Big Buck Bunny.mp4',
}
}
_formats = {
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
+ 'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
)
try:
title = self._html_search_regex(
- r'"title","(?P<title>.*?)"',
+ r'"title"\s+,\s+"[^"]+',
webpage,
- 'title',
- group='title'
+ 'title'
)
fmt_stream_map = self._html_search_regex(
- r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
+ r'"fmt_stream_map"\s+,\s+"[^"]+',
webpage,
- 'fmt_stream_map',
- group='fmt_stream_map'
+ 'fmt_stream_map'
)
fmt_list = self._html_search_regex(
- r'"fmt_list","(?P<fmt_list>.*?)"',
+ r'"fmt_list"\s+,\s+"[^"]+',
webpage,
- 'fmt_list',
- group='fmt_list'
+ 'fmt_list'
)
# timestamp = self._html_search_regex(
-# r'"timestamp","(?P<timestamp>.*?)"',
+# r'"timestamp"\s+,\s+"[^"]+',
# webpage,
-# 'timestamp',
-# group='timestamp'
+# 'timestamp'
# )
length_seconds = self._html_search_regex(
- r'"length_seconds","(?P<length_seconds>.*?)"',
+ r'"length_seconds"\s+,\s+"[^"]+',
webpage,
- 'length_seconds',
- group='length_seconds'
+ 'length_seconds'
)
except RegexNotFoundError:
try:
reason = self._html_search_regex(
- r'"reason","(?P<reason>.*?)"',
+ r'"reason","[^"]+',
webpage,
- 'reason',
- group='reason'
+ 'reason'
)
self.report_warning(reason)
return