X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fgeneric.py;h=c108d4a8a4e9a31d6b931d39c8a6233e75be1b0d;hb=7073015a23f96ce8ca0400286051d53cb9237a9f;hp=7f7c1ba29b9f39e0b63dee19fd8544fdfe3e531d;hpb=27e70a8f6cc3f002983a31850fad81c572ced277;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7f7c1ba29..c108d4a8a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -87,6 +87,9 @@ from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE from .anvato import AnvatoIE +from .washingtonpost import WashingtonPostIE +from .wistia import WistiaIE +from .mediaset import MediasetIE class GenericIE(InfoExtractor): @@ -1428,6 +1431,22 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # Brightcove embed with whitespace around attribute names + 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill', + 'info_dict': { + 'id': '3167554373001', + 'ext': 'mp4', + 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill", + 'description': 'md5:57bacb0e0f29349de4972bfda3191713', + 'uploader_id': '1079349493', + 'upload_date': '20140207', + 'timestamp': 1391810548, + }, + 'params': { + 'skip_download': True, + }, + }, # Another form of arte.tv embed { 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html', @@ -1687,6 +1706,33 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 4, }, + { + # WashingtonPost embed + 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches', + 'info_dict': { + 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac', + 'ext': 'mp4', + 'title': "No one has seen the drama series based on Trump's life \u2014 until now", + 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.', + 'timestamp': 1455216756, + 'uploader': 'The Washington Post', + 'upload_date': '20160211', + }, + 'add_ie': [WashingtonPostIE.ie_key()], + }, + { + # Mediaset embed + 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml', + 'info_dict': { + 'id': '720642', + 'ext': 'mp4', + 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [MediasetIE.ie_key()], + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2080,57 +2126,20 @@ class GenericIE(InfoExtractor): playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) # Look for embedded Wistia player - match = re.search( - r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) - if match: - embed_url = self._proto_relative_url( - unescapeHTML(match.group('url'))) - return { - '_type': 'url_transparent', - 'url': embed_url, - 'ie_key': 'Wistia', - 'uploader': video_uploader, - } - - match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) - if match: + wistia_url = WistiaIE._extract_url(webpage) + if wistia_url: return { '_type': 'url_transparent', - 'url': 'wistia:%s' % match.group('id'), - 'ie_key': 'Wistia', + 'url': self._proto_relative_url(wistia_url), + 'ie_key': WistiaIE.ie_key(), 'uploader': video_uploader, } - match = re.search( - r'''(?sx) - ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 - ''', webpage) - if match: - return self.url_result(self._proto_relative_url( - 'wistia:%s' % match.group('id')), 'Wistia') - # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: return self.url_result(svt_url, 'SVT') - # Look for embedded condenast player - matches = re.findall( - r']*?content="(.*?bandcamp\.com.*?)"', webpage) if mobj is not None: @@ -2524,29 +2533,6 @@ class GenericIE(InfoExtractor): return self.playlist_result( limelight_urls, video_id, video_title, video_description) - mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage) - if mobj: - lm = { - 'Media': 'media', - 'Channel': 'channel', - 'ChannelList': 'channel_list', - } - return self.url_result(smuggle_url('limelight:%s:%s' % ( - lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), - 'Limelight%s' % mobj.group(1), mobj.group(2)) - - mobj = re.search( - r'''(?sx) - ]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? - ]+ - name=(["\'])flashVars\2[^>]+ - value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) - ''', webpage) - if mobj: - return self.url_result(smuggle_url( - 'limelight:media:%s' % mobj.group('id'), - {'source_url': url}), 'LimelightMedia', mobj.group('id')) - # Look for Anvato embeds anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id) if anvato_urls: @@ -2670,6 +2656,18 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( rutube_urls, ie=RutubeIE.ie_key()) + # Look for WashingtonPost embeds + wapo_urls = WashingtonPostIE._extract_urls(webpage) + if wapo_urls: + return self.playlist_from_matches( + wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) + + # Look for Mediaset embeds + mediaset_urls = MediasetIE._extract_urls(webpage) + if mediaset_urls: + return self.playlist_from_matches( + mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject')