X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=fc1bedd57e63676da3b94cf8a6833d462334124b;hb=40c716d2a2cb1473695f7ef87cc78fcedd22541a;hp=2254ade90e153eb25514a7765238bdc603135c48;hpb=750f9020aedf73ca2320e57b38ca2153d7d4adec;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2254ade90..fc1bedd57 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -25,6 +25,7 @@ from ..utils import ( from .brightcove import BrightcoveIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .smotri import SmotriIE class GenericIE(InfoExtractor): @@ -185,7 +186,48 @@ class GenericIE(InfoExtractor): 'uploader': 'Ze Frank', 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', } - } + }, + # nowvideo embed hidden behind percent encoding + { + 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', + 'md5': '2baf4ddd70f697d94b1c18cf796d5107', + 'info_dict': { + 'id': '06e53103ca9aa', + 'ext': 'flv', + 'title': 'Macross Episode 001 Watch Macross Episode 001 onl', + 'description': 'No description', + }, + }, + # arte embed + { + 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html', + 'md5': '7653032cbb25bf6c80d80f217055fa43', + 'info_dict': { + 'id': '048195-004_PLUS7-F', + 'ext': 'flv', + 'title': 'X:enius', + 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168', + 'upload_date': '20140320', + }, + 'params': { + 'skip_download': 'Requires rtmpdump' + } + }, + # smotri embed + { + 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml', + 'md5': 'ec40048448e9284c9a1de77bb188108b', + 'info_dict': { + 'id': 'v27008541fad', + 'ext': 'mp4', + 'title': 'Крым и Севастополь вошли в состав России', + 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175', + 'duration': 900, + 'upload_date': '20140318', + 'uploader': 'rbctv_2012_4', + 'uploader_id': 'rbctv_2012_4', + }, + }, ] def report_download_webpage(self, video_id): @@ -337,6 +379,11 @@ class GenericIE(InfoExtractor): except compat_xml_parse_error: pass + # Sometimes embedded video player is hidden behind percent encoding + # (e.g. https://github.com/rg3/youtube-dl/issues/2448) + # Unescaping the whole page allows to handle those cases in a generic way + webpage = compat_urllib_parse.unquote(webpage) + # it's tempting to parse this further, but you would # have to take into account all the variations like # Video Title - Site Name @@ -503,6 +550,24 @@ class GenericIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') + # Look for embedded TED player + mobj = re.search( + r']+?src=(["\'])(?Phttp://embed\.ted\.com/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'TED') + + # Look for embedded arte.tv player + mobj = re.search( + r'