X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=62b1da25ee111748fc1e99ada617163274bcee69;hb=00558d94145f97c644e66ec086fa9b9d8c58280f;hp=bcb0765940df39656be9f78c1ab144976adb5e5e;hpb=1f7ccb90147184a6234e3b6d603ccec9ee7880e4;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bcb076594..62b1da25e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -16,6 +16,7 @@ from ..utils import ( ExtractorError, HEADRequest, + orderedSet, parse_xml, smuggle_url, unescapeHTML, @@ -289,6 +290,22 @@ class GenericIE(InfoExtractor): 'description': 'Mario\'s life in the fast lane has never looked so good.', }, }, + # YouTube embed via + { + 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', + 'info_dict': { + 'id': 'jpSGZsgga_I', + 'ext': 'mp4', + 'title': 'Asphalt 8: Airborne - Launch Trailer', + 'uploader': 'Gameloft', + 'uploader_id': 'gameloft', + 'upload_date': '20130821', + 'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a', + }, + 'params': { + 'skip_download': True, + } + } ] def report_download_webpage(self, video_id): @@ -479,6 +496,12 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + # Helper method + def _playlist_from_matches(matches, getter, ie=None): + urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + # Look for BrightCove: bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) if bc_urls: @@ -514,6 +537,7 @@ class GenericIE(InfoExtractor): matches = re.findall(r'''(?x) (?: ]+?src=| + data-video-url=| ]+?src=| embedSWF\(?:\s* ) @@ -522,19 +546,15 @@ class GenericIE(InfoExtractor): (?:embed|v)/.+?) \1''', webpage) if matches: - urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') - for tuppl in matches] - return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) + return _playlist_from_matches( + matches, lambda m: unescapeHTML(m[1]), ie='Youtube') # Look for embedded Dailymotion player matches = re.findall( r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) if matches: - urlrs = [self.url_result(unescapeHTML(tuppl[1])) - for tuppl in matches] - return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) + return _playlist_from_matches( + matches, lambda m: unescapeHTML(m[1])) # Look for embedded Wistia player match = re.search( @@ -648,10 +668,8 @@ class GenericIE(InfoExtractor): # Look for funnyordie embed matches = re.findall(r']+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) if matches: - urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') - for eurl in matches] - return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) + return _playlist_from_matches( + matches, getter=unescapeHTML, ie='FunnyOrDie') # Look for embedded RUTV player rutv_url = RUTVIE._extract_url(webpage) @@ -706,6 +724,20 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url, ie='MTVServicesEmbedded') + # Look for embedded yahoo player + mobj = re.search( + r']+?src=(["\'])(?Phttps?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Yahoo') + + # Look for embedded sbs.com.au player + mobj = re.search( + r']+?src=(["\'])(?Phttps?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'SBS') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: