X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=f452a90d87804ffa445f1f2dcc183de740ced767;hb=1139a54d9bb9d214a8451301a44af6d03dd450b7;hp=45c85ed01f4e891293b3b0d37f87e174ab2773d1;hpb=c1777d5cb3fb1ae48de79badfe5b8db9963999b4;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 45c85ed01..f452a90d8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -7,6 +7,7 @@ import itertools import json import os.path import re +import time import traceback from .common import InfoExtractor, SearchInfoExtractor @@ -38,17 +39,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' - _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' - _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False def _set_language(self): - return bool(self._download_webpage( - self._LANG_URL, None, - note='Setting language', errnote='unable to set language', - fatal=False)) + self._set_cookie( + '.youtube.com', 'PREF', 'f1=50000000&hl=en', + # YouTube sets the expire time to about two months + expire_time=time.time() + 2 * 30 * 24 * 3600) def _login(self): """ @@ -176,30 +175,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return False return True - def _confirm_age(self): - age_form = { - 'next_url': '/', - 'action_confirm': 'Confirm', - } - req = compat_urllib_request.Request( - self._AGE_URL, - compat_urllib_parse.urlencode(age_form).encode('ascii') - ) - - self._download_webpage( - req, None, - note='Confirming age', errnote='Unable to confirm age', - fatal=False) - def _real_initialize(self): if self._downloader is None: return - if self._get_login_info()[0] is not None: - if not self._set_language(): - return + self._set_language() if not self._login(): return - self._confirm_age() class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): @@ -305,6 +286,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, @@ -398,8 +380,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'info_dict': { 'id': 'IB3lcPjvWLA', 'ext': 'm4a', - 'title': 'Afrojack - The Spark ft. Spree Wilson', - 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8', + 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', + 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', @@ -421,7 +403,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', } - } + }, + # Normal age-gate video (No vevo, embed allowed) + { + 'url': 'http://youtube.com/watch?v=HtVdAasjOgU', + 'info_dict': { + 'id': 'HtVdAasjOgU', + 'ext': 'mp4', + 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', + 'description': 'md5:eca57043abae25130f58f655ad9a7771', + 'uploader': 'The Witcher', + 'uploader_id': 'WitcherGame', + 'upload_date': '20140605', + }, + }, ] def __init__(self, *args, **kwargs): @@ -609,9 +604,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return {} player_config = json.loads(mobj.group(1)) try: - args = player_config[u'args'] - caption_url = args[u'ttsurl'] - timestamp = args[u'timestamp'] + args = player_config['args'] + caption_url = args['ttsurl'] + timestamp = args['timestamp'] # We get the available subtitles list_params = compat_urllib_parse.urlencode({ 'type': 'list', @@ -684,16 +679,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id - pref_cookies = [ - c for c in self._downloader.cookiejar - if c.domain == '.youtube.com' and c.name == 'PREF'] - for pc in pref_cookies: - if 'hl=' in pc.value: - pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value) - else: - if pc.value: - pc.value += '&' - pc.value += 'hl=en' video_webpage = self._download_webpage(url, video_id) # Attempt to extract SWF player URL @@ -704,7 +689,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_url = None # Get video info - self.report_video_info_webpage_download(video_id) if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -723,15 +707,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_info = compat_parse_qs(video_info_webpage) else: age_gate = False - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - video_info_webpage = self._download_webpage(video_info_url, video_id, - note=False, - errnote='unable to download video info webpage') - video_info = compat_parse_qs(video_info_webpage) - if 'token' in video_info: - break + try: + # Try looking directly into the video webpage + mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) + if not mobj: + raise ValueError('Could not find ytplayer.config') # caught below + json_code = uppercase_escape(mobj.group(1)) + ytplayer_config = json.loads(json_code) + args = ytplayer_config['args'] + # Convert to the same format returned by compat_parse_qs + video_info = dict((k, [v]) for k, v in args.items()) + if 'url_encoded_fmt_stream_map' not in args: + raise ValueError('No stream_map present') # caught below + except ValueError: + # We fallback to the get_video_info pages (used by the embed page) + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ( + '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (proto, video_id, el_type)) + video_info_webpage = self._download_webpage( + video_info_url, + video_id, note=False, + errnote='unable to download video info webpage') + video_info = compat_parse_qs(video_info_webpage) + if 'token' in video_info: + break if 'token' not in video_info: if 'reason' in video_info: raise ExtractorError( @@ -856,32 +857,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('writeannotations', False): video_annotations = self._extract_annotations(video_id) - # Decide which formats to download - try: - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) - if not mobj: - raise ValueError('Could not find vevo ID') - json_code = uppercase_escape(mobj.group(1)) - ytplayer_config = json.loads(json_code) - args = ytplayer_config['args'] - # Easy way to know if the 's' value is in url_encoded_fmt_stream_map - # this signatures are encrypted - if 'url_encoded_fmt_stream_map' not in args: - raise ValueError('No stream_map present') # caught below - re_signature = re.compile(r'[&,]s=') - m_s = re_signature.search(args['url_encoded_fmt_stream_map']) - if m_s is not None: - self.to_screen('%s: Encrypted signatures detected.' % video_id) - video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re_signature.search(args.get('adaptive_fmts', '')) - if m_s is not None: - if 'adaptive_fmts' in video_info: - video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] - else: - video_info['adaptive_fmts'] = [args['adaptive_fmts']] - except ValueError: - pass - def _map_to_format_list(urlmap): formats = [] for itag, video_real_url in urlmap.items(): @@ -950,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): parts_sizes = self._signature_cache_id(encrypted_sig) self.to_screen('{%s} signature length %s, %s' % - (format_id, parts_sizes, player_desc)) + (format_id, parts_sizes, player_desc)) signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) @@ -974,10 +949,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. # Luckily, it seems, this case uses some kind of default signature (len == 86), so the # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. - if age_gate: - dash_manifest_url = video_info.get('dashmpd')[0] - else: - dash_manifest_url = ytplayer_config['args']['dashmpd'] + dash_manifest_url = video_info.get('dashmpd')[0] def decrypt_sig(mobj): s = mobj.group(1) @@ -1002,6 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), 'asr': int_or_none(r.attrib.get('audioSamplingRate')), 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), } try: existing_format = next( @@ -1214,7 +1187,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubeTopListIE(YoutubePlaylistIE): IE_NAME = 'youtube:toplist' IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') + ' (Example: "yttoplist:music:Top Tracks")') _VALID_URL = r'yttoplist:(?P.*?):(?P.*?)$' _TESTS = [{ 'url': 'yttoplist:music:Trending',