X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=83b5840f76c7c9aa76760457e9b87ce86e83e680;hb=15707c7e024f1f29e7abd8ddaa362196ef2d4af6;hp=43fdbfab1f1e637d9201373fb6a659ec0c3ec723;hpb=7caf9830b0ae80363d3b1817b322452474631cc1;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43fdbfab1..83b5840f7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,16 +17,15 @@ from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, compat_str, ) from ..utils import ( clean_html, - encode_dict, error_to_compat_str, ExtractorError, float_or_none, @@ -116,7 +115,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii') req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -149,7 +148,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') + tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii') tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( @@ -309,6 +308,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, # Apple HTTP Live Streaming + '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, @@ -383,6 +383,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -409,6 +410,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'iconic ep', 'iconic', 'love', 'it'], 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop', 'license': 'Standard YouTube License', 'creator': 'Icona Pop', } @@ -425,6 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:64249768eec3bc4276236606ea996373', 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', 'license': 'Standard YouTube License', 'creator': 'Justin Timberlake', 'age_limit': 18, @@ -441,6 +444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 'uploader': 'SET India', 'uploader_id': 'setindia', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia', 'license': 'Standard YouTube License', 'age_limit': 18, } @@ -454,6 +458,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -474,6 +479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'uploader_id': '8KVIDEO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 'description': '', 'uploader': '8KVIDEO', 'license': 'Standard YouTube License', @@ -531,6 +537,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', 'license': 'Standard YouTube License', 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', @@ -546,6 +553,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -561,6 +569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', 'upload_date': '20110629', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -574,6 +583,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20100430', 'uploader_id': 'deadmau5', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5', 'creator': 'deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', @@ -593,6 +603,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20150827', 'uploader_id': 'olympic', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympics', @@ -611,6 +622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'stretched_ratio': 16 / 9., 'upload_date': '20110310', 'uploader_id': 'AllenMeow', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'uploader': '孫艾倫', 'license': 'Standard YouTube License', @@ -644,6 +656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:116377fd2963b81ec4ce64b542173306', 'upload_date': '20150625', 'uploader_id': 'dorappi2000', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', 'license': 'Standard YouTube License', 'formats': 'mincount:33', @@ -685,6 +698,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -696,6 +710,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -707,6 +722,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -718,6 +734,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }], @@ -752,6 +769,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader': 'IronSoulElf', 'license': 'Standard YouTube License', 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan', @@ -791,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader': 'BerkmanCenter', 'license': 'Creative Commons Attribution license (reuse allowed)', }, @@ -798,6 +817,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # Channel-like uploader_url + 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg', + 'info_dict': { + 'id': 'eQcmzGIKrzg', + 'ext': 'mp4', + 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', + 'description': 'md5:dda0d780d5a6e120758d1711d062a867', + 'upload_date': '20151119', + 'uploader': 'Bernie 2016', + 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'license': 'Creative Commons Attribution license (reuse allowed)', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.youtube.com/watch?feature=player_embedded&v=V36LpHqtcDY', 'only_matching': True, @@ -969,7 +1006,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': lang, 'v': video_id, 'fmt': ext, @@ -1018,7 +1055,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if caption_url: timestamp = args['timestamp'] # We get the available subtitles - list_params = compat_urllib_parse.urlencode({ + list_params = compat_urllib_parse_urlencode({ 'type': 'list', 'tlangs': 1, 'asrs': 1, @@ -1037,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub_lang = lang_node.attrib['lang_code'] sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': original_lang, 'tlang': sub_lang, 'fmt': ext, @@ -1056,7 +1093,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): caption_tracks = args['caption_tracks'] caption_translation_languages = args['caption_translation_languages'] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] - parsed_caption_url = compat_urlparse.urlparse(caption_url) + parsed_caption_url = compat_urllib_parse_urlparse(caption_url) caption_qs = compat_parse_qs(parsed_caption_url.query) sub_lang_list = {} @@ -1072,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fmt': [ext], }) sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( - query=compat_urllib_parse.urlencode(caption_qs, True))) + query=compat_urllib_parse_urlencode(caption_qs, True))) sub_formats.append({ 'url': sub_url, 'ext': ext, @@ -1102,7 +1139,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'cpn': [cpn], }) playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) self._download_webpage( playback_url, video_id, 'Marking watched', @@ -1187,7 +1224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( @@ -1334,9 +1371,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # uploader_id video_uploader_id = None - mobj = re.search(r'', video_webpage) + video_uploader_url = None + mobj = re.search( + r'', + video_webpage) if mobj is not None: - video_uploader_id = mobj.group(1) + video_uploader_id = mobj.group('uploader_id') + video_uploader_url = mobj.group('uploader_url') else: self._downloader.report_warning('unable to extract uploader nickname') @@ -1642,6 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, + 'uploader_url': video_uploader_url, 'upload_date': upload_date, 'license': video_license, 'creator': video_creator, @@ -1868,7 +1910,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url) + return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url) + else super(YoutubeChannelIE, cls).suitable(url)) def _real_extract(self, url): channel_id = self._match_id(url) @@ -1943,6 +1986,51 @@ class YoutubeUserIE(YoutubeChannelIE): return super(YoutubeUserIE, cls).suitable(url) +class YoutubeLiveIE(YoutubeBaseInfoExtractor): + IE_DESC = 'YouTube.com live streams' + _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+))/live' + IE_NAME = 'youtube:live' + + _TESTS = [{ + 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'info_dict': { + 'id': 'a48o2S1cPoo', + 'ext': 'mp4', + 'title': 'The Young Turks - Live Main Show', + 'uploader': 'The Young Turks', + 'uploader_id': 'TheYoungTurks', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'upload_date': '20150715', + 'license': 'Standard YouTube License', + 'description': 'md5:438179573adcdff3c97ebb1ee632b891', + 'categories': ['News & Politics'], + 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + base_url = mobj.group('base_url') + webpage = self._download_webpage(url, channel_id, fatal=False) + if webpage: + page_type = self._og_search_property( + 'type', webpage, 'page type', default=None) + video_id = self._html_search_meta( + 'videoId', webpage, 'video id', default=None) + if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id): + return self.url_result(video_id, YoutubeIE.ie_key()) + return self.url_result(base_url) + + class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com user/channel playlists' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+)/playlists' @@ -1996,7 +2084,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): 'spf': 'navigate', } url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum,