X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyahoo.py;h=b2d8f4b48daddcf734d3a1fb461d1b92736bcfd1;hb=3a686853e1739dfc26548cdc09fe89e693e76a9f;hp=1d9b9875069e481aa0994602c68f7e00d9766f3f;hpb=364ca0582eb1506af049c0e17595509c6bc0e00c;p=youtube-dl.git diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 1d9b98750..b2d8f4b48 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -147,6 +148,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', @@ -154,11 +156,16 @@ class YahooIE(InfoExtractor): 'description': 'md5:8fc39608213295748e1e289807838c97', 'duration': 1646, }, - 'params': { - # m3u8 download - 'skip_download': True, + }, { + # it uses an alias to get the video_id + 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', + 'info_dict': { + 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', + 'ext': 'mp4', + 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', + 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', }, - } + }, ] def _real_extract(self, url): @@ -202,13 +209,24 @@ class YahooIE(InfoExtractor): r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, default=None) if items_json is None: - CONTENT_ID_REGEXES = [ - r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', - r'"first_videoid"\s*:\s*"([^"]+)"', - r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - ] - video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') + alias = self._search_regex( + r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) + if alias is not None: + alias_info = self._download_json( + 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, + display_id, 'Downloading alias info') + video_id = alias_info[0]['id'] + else: + CONTENT_ID_REGEXES = [ + r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', + r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', + r'"first_videoid"\s*:\s*"([^"]+)"', + r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), + r']data-uuid=["\']([^"\']+)', + r'yahoo://article/view\?.*\buuid=([^&"\']+)', + ] + video_id = self._search_regex( + CONTENT_ID_REGEXES, webpage, 'content ID') else: items = json.loads(items_json) info = items['mediaItems']['query']['results']['mediaObj'][0] @@ -286,7 +304,7 @@ class YahooIE(InfoExtractor): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'protocol': 'http', 'region': region, })