X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=300485c7f81b2ee6cba9f03823592d31b2fc6f9a;hb=143907304908ee34bfcfe4914cda596d235291f8;hp=7666cf2078177db0682aae81b01b4253362efdfd;hpb=bcf89ce62cb4f6ab8802ab6aef01c3afaefc0075;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7666cf207..300485c7f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -134,6 +134,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # funnyordie embed + { + 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', + 'md5': '7cf780be104d40fea7bae52eed4a470e', + 'info_dict': { + 'id': '18e820ec3f', + 'ext': 'mp4', + 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', + 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', + } + }, ] def report_download_webpage(self, video_id): @@ -280,6 +291,11 @@ class GenericIE(InfoExtractor): except compat_xml_parse_error: pass + # Sometimes embedded video player is hidden behind percent encoding + # (e.g. https://github.com/rg3/youtube-dl/issues/2448) + # Unescaping the whole page allows to handle those cases in a generic way + webpage = compat_urllib_parse.unquote(webpage) + # it's tempting to parse this further, but you would # have to take into account all the variations like # Video Title - Site Name @@ -432,6 +448,14 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) + # Look for funnyordie embed + matches = re.findall(r']+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) + if matches: + urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') + for eurl in matches] + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: