3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
36 get_element_by_attribute,
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
73 def _set_language(self):
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76 # YouTube sets the expire time to about two months
77 expire_time=time.time() + 2 * 30 * 24 * 3600)
79 def _ids_to_results(self, ids):
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 username, password = self._get_login_info()
93 # No authentication to be performed
95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
103 if login_page is False:
106 login_form = self._hidden_inputs(login_page)
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116 'f.req': json.dumps(f_req),
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
132 self._downloader.report_warning(message)
136 None, [], None, 'US', None, None, 2, False, True,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 1, [None, None, []], None, None, None, True
147 lookup_results = req(
148 self._LOOKUP_URL, lookup_req,
149 'Looking up account info', 'Unable to look up account info')
151 if lookup_results is False:
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 warn('Unable to extract user hash')
161 None, 1, None, [1, None, None, None, [password, None, True]],
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
171 if challenge_results is False:
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 warn('Unable to extract result entry')
187 login_challenge = try_get(res, lambda x: x[0][0], list)
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
193 status = try_get(login_challenge, lambda x: x[5], compat_str)
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 warn('Unable to extract TL')
203 tfa_code = self._get_tfa_info('2-step verification code')
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 tfa_code = remove_start(tfa_code, 'G-')
214 user_hash, None, 2, None,
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
224 if tfa_results is False:
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 challenge = CHALLENGES.get(
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
255 check_cookie_results = self._download_webpage(
256 check_cookie_url, None, 'Checking cookie', fatal=False)
258 if check_cookie_results is False:
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
267 def _download_webpage_handle(self, *args, **kwargs):
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272 *args, **compat_kwargs(kwargs))
274 def _real_initialize(self):
275 if self._downloader is None:
278 if not self._login():
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283 # Extract entries from page with "Load more" button
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
287 for entry in self._process_page(content_html):
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
296 while count <= retries:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
318 more_widget_html = more['load_more_widget_html']
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 video_id = mobj.group('id')
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
335 video_title = video_title.strip()
336 if video_title == '► Play all':
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
346 def extract_videos_from_page(self, page):
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
351 return zip(ids_in_page, titles_in_page)
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
365 title = self._og_search_title(webpage, fatal=False)
366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370 IE_DESC = 'YouTube.com'
371 _VALID_URL = r"""(?x)^
373 (?:https?://|//) # http(s):// or protocol-independent URL
374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375 (?:www\.)?deturl\.com/www\.youtube\.com/|
376 (?:www\.)?pwnyoutube\.com/|
377 (?:www\.)?hooktube\.com/|
378 (?:www\.)?yourepeat\.com/|
379 tube\.majestyc\.net/|
380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381 (?:(?:www|dev)\.)?invidio\.us/|
382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384 (?:www\.)?invidious\.kabi\.tk/|
385 (?:www\.)?invidious\.13ad\.de/|
386 (?:www\.)?invidious\.mastodon\.host/|
387 (?:www\.)?invidious\.nixnet\.xyz/|
388 (?:www\.)?invidious\.drycat\.fr/|
389 (?:www\.)?tube\.poal\.co/|
390 (?:www\.)?vid\.wxzm\.sx/|
391 (?:www\.)?yewtu\.be/|
392 (?:www\.)?yt\.elukerio\.org/|
393 (?:www\.)?yt\.lelux\.fi/|
394 (?:www\.)?invidious\.ggc-project\.de/|
395 (?:www\.)?yt\.maisputain\.ovh/|
396 (?:www\.)?invidious\.13ad\.de/|
397 (?:www\.)?invidious\.toot\.koeln/|
398 (?:www\.)?invidious\.fdn\.fr/|
399 (?:www\.)?watch\.nettohikari\.com/|
400 (?:www\.)?kgg2m7yk5aybusll\.onion/|
401 (?:www\.)?qklhadlycap4cnod\.onion/|
402 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
403 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
404 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
405 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
406 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
407 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
408 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
409 (?:.*?\#/)? # handle anchor (#/) redirect urls
410 (?: # the various things that can precede the ID:
411 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
412 |(?: # or the v= param in all its forms
413 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
414 (?:\?|\#!?) # the params delimiter ? or # or #!
415 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
420 youtu\.be| # just youtu.be/xxxx
421 vid\.plus| # or vid.plus/xxxx
422 zwearz\.com/watch| # or zwearz.com/watch/xxxx
424 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
426 )? # all until now is optional -> you can pass the naked ID
427 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
430 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
431 WL # WL are handled by the watch later IE
434 (?(1).+)? # if we found the ID, everything can follow
435 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
436 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
438 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
439 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
442 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
443 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
444 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
445 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
446 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
447 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
448 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
450 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
451 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
452 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
453 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
454 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
455 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
456 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
457 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
458 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
459 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
463 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
464 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
465 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
466 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
467 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
468 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
469 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
471 # Apple HTTP Live Streaming
472 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
473 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
474 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
475 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
476 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
477 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
478 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
482 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
485 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
487 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
488 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
491 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
492 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
493 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
496 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
497 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
498 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
499 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
500 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
501 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
502 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
505 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
506 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
507 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
508 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
509 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
510 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
512 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
517 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
521 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
523 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
524 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
525 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
529 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
530 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
532 # Dash webm audio with opus inside
533 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
534 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
535 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
538 '_rtmp': {'protocol': 'rtmp'},
540 # av01 video only formats sometimes served with "unknown" codecs
541 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
542 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
543 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
544 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
546 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
553 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
557 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
558 'uploader': 'Philipp Hagemeister',
559 'uploader_id': 'phihag',
560 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
561 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
562 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
563 'upload_date': '20121002',
564 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
565 'categories': ['Science & Technology'],
566 'tags': ['youtube-dl'],
570 'dislike_count': int,
576 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
577 'note': 'Test generic use_cipher_signature video (#897)',
581 'upload_date': '20120506',
582 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
583 'alt_title': 'I Love It (feat. Charli XCX)',
584 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
585 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
586 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
587 'iconic ep', 'iconic', 'love', 'it'],
589 'uploader': 'Icona Pop',
590 'uploader_id': 'IconaPop',
591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
592 'creator': 'Icona Pop',
593 'track': 'I Love It (feat. Charli XCX)',
594 'artist': 'Icona Pop',
598 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
599 'note': 'Test VEVO video with age protection (#956)',
603 'upload_date': '20130703',
604 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
605 'alt_title': 'Tunnel Vision',
606 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
608 'uploader': 'justintimberlakeVEVO',
609 'uploader_id': 'justintimberlakeVEVO',
610 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
611 'creator': 'Justin Timberlake',
612 'track': 'Tunnel Vision',
613 'artist': 'Justin Timberlake',
618 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
619 'note': 'Embed-only video (#1746)',
623 'upload_date': '20120608',
624 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
625 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
626 'uploader': 'SET India',
627 'uploader_id': 'setindia',
628 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
633 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
634 'note': 'Use the first video ID in the URL',
638 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
639 'uploader': 'Philipp Hagemeister',
640 'uploader_id': 'phihag',
641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
642 'upload_date': '20121002',
643 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
644 'categories': ['Science & Technology'],
645 'tags': ['youtube-dl'],
649 'dislike_count': int,
652 'skip_download': True,
656 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
657 'note': '256k DASH audio (format 141) via DASH manifest',
661 'upload_date': '20121002',
662 'uploader_id': '8KVIDEO',
663 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
665 'uploader': '8KVIDEO',
666 'title': 'UHDTV TEST 8K VIDEO.mp4'
669 'youtube_include_dash_manifest': True,
672 'skip': 'format 141 not served anymore',
674 # DASH manifest with encrypted signature
676 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
680 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
681 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
683 'uploader': 'AfrojackVEVO',
684 'uploader_id': 'AfrojackVEVO',
685 'upload_date': '20131011',
688 'youtube_include_dash_manifest': True,
689 'format': '141/bestaudio[ext=m4a]',
692 # JS player signature function name containing $
694 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
698 'title': 'Taylor Swift - Shake It Off',
699 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
701 'uploader': 'TaylorSwiftVEVO',
702 'uploader_id': 'TaylorSwiftVEVO',
703 'upload_date': '20140818',
706 'youtube_include_dash_manifest': True,
707 'format': '141/bestaudio[ext=m4a]',
712 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
717 'upload_date': '20100909',
718 'uploader': 'Amazing Atheist',
719 'uploader_id': 'TheAmazingAtheist',
720 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
721 'title': 'Burning Everyone\'s Koran',
722 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
725 # Normal age-gate video (No vevo, embed allowed)
727 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
731 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
732 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
734 'uploader': 'The Witcher',
735 'uploader_id': 'WitcherGame',
736 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
737 'upload_date': '20140605',
741 # Age-gate video with encrypted signature
743 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
747 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
748 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
750 'uploader': 'LloydVEVO',
751 'uploader_id': 'LloydVEVO',
752 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
753 'upload_date': '20110629',
757 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
758 # YouTube Red ad is not captured for creator
760 'url': '__2ABJjxzNo',
765 'upload_date': '20100430',
766 'uploader_id': 'deadmau5',
767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
768 'creator': 'Dada Life, deadmau5',
769 'description': 'md5:12c56784b8032162bb936a5f76d55360',
770 'uploader': 'deadmau5',
771 'title': 'Deadmau5 - Some Chords (HD)',
772 'alt_title': 'This Machine Kills Some Chords',
774 'expected_warnings': [
775 'DASH manifest missing',
778 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
780 'url': 'lqQg6PlCWgI',
785 'upload_date': '20150827',
786 'uploader_id': 'olympic',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
788 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
789 'uploader': 'Olympic',
790 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
793 'skip_download': 'requires avconv',
798 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
802 'stretched_ratio': 16 / 9.,
804 'upload_date': '20110310',
805 'uploader_id': 'AllenMeow',
806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
807 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
809 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
812 # url_encoded_fmt_stream_map is empty string
814 'url': 'qEJwOuvDf7I',
818 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
820 'upload_date': '20150404',
821 'uploader_id': 'spbelect',
822 'uploader': 'Наблюдатели Петербурга',
825 'skip_download': 'requires avconv',
827 'skip': 'This live event has ended.',
829 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
831 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
835 'title': 'md5:7b81415841e02ecd4313668cde88737a',
836 'description': 'md5:116377fd2963b81ec4ce64b542173306',
838 'upload_date': '20150625',
839 'uploader_id': 'dorappi2000',
840 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
841 'uploader': 'dorappi2000',
842 'formats': 'mincount:31',
844 'skip': 'not actual anymore',
846 # DASH manifest with segment_list
848 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
849 'md5': '8ce563a1d667b599d21064e982ab9e31',
853 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
854 'uploader': 'Airtek',
855 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
856 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
857 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
860 'youtube_include_dash_manifest': True,
861 'format': '135', # bestvideo
863 'skip': 'This live event has ended.',
866 # Multifeed videos (multiple cameras), URL is for Main Camera
867 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
870 'title': 'teamPGP: Rocket League Noob Stream',
871 'description': 'md5:dc7872fb300e143831327f1bae3af010',
877 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
878 'description': 'md5:dc7872fb300e143831327f1bae3af010',
880 'upload_date': '20150721',
881 'uploader': 'Beer Games Beer',
882 'uploader_id': 'beergamesbeer',
883 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
884 'license': 'Standard YouTube License',
890 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
891 'description': 'md5:dc7872fb300e143831327f1bae3af010',
893 'upload_date': '20150721',
894 'uploader': 'Beer Games Beer',
895 'uploader_id': 'beergamesbeer',
896 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
897 'license': 'Standard YouTube License',
903 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
904 'description': 'md5:dc7872fb300e143831327f1bae3af010',
906 'upload_date': '20150721',
907 'uploader': 'Beer Games Beer',
908 'uploader_id': 'beergamesbeer',
909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
910 'license': 'Standard YouTube License',
916 'title': 'teamPGP: Rocket League Noob Stream (zim)',
917 'description': 'md5:dc7872fb300e143831327f1bae3af010',
919 'upload_date': '20150721',
920 'uploader': 'Beer Games Beer',
921 'uploader_id': 'beergamesbeer',
922 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
923 'license': 'Standard YouTube License',
927 'skip_download': True,
929 'skip': 'This video is not available.',
932 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
933 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
936 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
939 'skip': 'Not multifeed anymore',
942 'url': 'https://vid.plus/FlRa-iH7PGw',
943 'only_matching': True,
946 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
947 'only_matching': True,
950 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
951 # Also tests cut-off URL expansion in video description (see
952 # https://github.com/ytdl-org/youtube-dl/issues/1892,
953 # https://github.com/ytdl-org/youtube-dl/issues/8164)
954 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
958 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
959 'alt_title': 'Dark Walk - Position Music',
960 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
962 'upload_date': '20151119',
963 'uploader_id': 'IronSoulElf',
964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
965 'uploader': 'IronSoulElf',
966 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
967 'track': 'Dark Walk - Position Music',
968 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
969 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
972 'skip_download': True,
976 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
977 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
978 'only_matching': True,
981 # Video with yt:stretch=17:0
982 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
986 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
987 'description': 'md5:ee18a25c350637c8faff806845bddee9',
988 'upload_date': '20151107',
989 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
990 'uploader': 'CH GAMER DROID',
993 'skip_download': True,
995 'skip': 'This video does not exist.',
998 # Video licensed under Creative Commons
999 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1001 'id': 'M4gD1WSo5mA',
1003 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1004 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1006 'upload_date': '20150127',
1007 'uploader_id': 'BerkmanCenter',
1008 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1009 'uploader': 'The Berkman Klein Center for Internet & Society',
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1013 'skip_download': True,
1017 # Channel-like uploader_url
1018 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1020 'id': 'eQcmzGIKrzg',
1022 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1023 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1025 'upload_date': '20151119',
1026 'uploader': 'Bernie Sanders',
1027 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1028 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1029 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 'skip_download': True,
1036 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1037 'only_matching': True,
1040 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1041 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1042 'only_matching': True,
1045 # Rental video preview
1046 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1048 'id': 'uGpuVWrhIzE',
1050 'title': 'Piku - Trailer',
1051 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1052 'upload_date': '20150811',
1053 'uploader': 'FlixMatrix',
1054 'uploader_id': 'FlixMatrixKaravan',
1055 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1056 'license': 'Standard YouTube License',
1059 'skip_download': True,
1061 'skip': 'This video is not available.',
1064 # YouTube Red video with episode data
1065 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1067 'id': 'iqKdEhx-dD4',
1069 'title': 'Isolation - Mind Field (Ep 1)',
1070 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1072 'upload_date': '20170118',
1073 'uploader': 'Vsauce',
1074 'uploader_id': 'Vsauce',
1075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1076 'series': 'Mind Field',
1078 'episode_number': 1,
1081 'skip_download': True,
1083 'expected_warnings': [
1084 'Skipping DASH manifest',
1088 # The following content has been identified by the YouTube community
1089 # as inappropriate or offensive to some audiences.
1090 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1092 'id': '6SJNVb0GnPI',
1094 'title': 'Race Differences in Intelligence',
1095 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1097 'upload_date': '20140124',
1098 'uploader': 'New Century Foundation',
1099 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1103 'skip_download': True,
1108 'url': '1t24XAntNCY',
1109 'only_matching': True,
1112 # geo restricted to JP
1113 'url': 'sJL6WA-aGkQ',
1114 'only_matching': True,
1117 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1118 'only_matching': True,
1121 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1122 'only_matching': True,
1126 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1127 'only_matching': True,
1130 # Video with unsupported adaptive stream type formats
1131 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1133 'id': 'Z4Vy8R84T1U',
1135 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1136 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1138 'upload_date': '20130923',
1139 'uploader': 'Amelia Putri Harwita',
1140 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1142 'formats': 'maxcount:10',
1145 'skip_download': True,
1146 'youtube_include_dash_manifest': False,
1148 'skip': 'not actual anymore',
1151 # Youtube Music Auto-generated description
1152 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1154 'id': 'MgNrAu2pzNs',
1156 'title': 'Voyeur Girl',
1157 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1158 'upload_date': '20190312',
1159 'uploader': 'Stephen - Topic',
1160 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1161 'artist': 'Stephen',
1162 'track': 'Voyeur Girl',
1163 'album': 'it\'s too much love to know my dear',
1164 'release_date': '20190313',
1165 'release_year': 2019,
1168 'skip_download': True,
1172 # Youtube Music Auto-generated description
1173 # Retrieve 'artist' field from 'Artist:' in video description
1174 # when it is present on youtube music video
1175 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1177 'id': 'k0jLE7tTwjY',
1179 'title': 'Latch Feat. Sam Smith',
1180 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1181 'upload_date': '20150110',
1182 'uploader': 'Various Artists - Topic',
1183 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1184 'artist': 'Disclosure',
1185 'track': 'Latch Feat. Sam Smith',
1186 'album': 'Latch Featuring Sam Smith',
1187 'release_date': '20121008',
1188 'release_year': 2012,
1191 'skip_download': True,
1195 # Youtube Music Auto-generated description
1196 # handle multiple artists on youtube music video
1197 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1199 'id': '74qn0eJSjpA',
1201 'title': 'Eastside',
1202 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1203 'upload_date': '20180710',
1204 'uploader': 'Benny Blanco - Topic',
1205 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1206 'artist': 'benny blanco, Halsey, Khalid',
1207 'track': 'Eastside',
1208 'album': 'Eastside',
1209 'release_date': '20180713',
1210 'release_year': 2018,
1213 'skip_download': True,
1217 # Youtube Music Auto-generated description
1218 # handle youtube music video with release_year and no release_date
1219 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1221 'id': '-hcAI0g-f5M',
1223 'title': 'Put It On Me',
1224 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1225 'upload_date': '20180426',
1226 'uploader': 'Matt Maeson - Topic',
1227 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1228 'artist': 'Matt Maeson',
1229 'track': 'Put It On Me',
1230 'album': 'The Hearse',
1231 'release_date': None,
1232 'release_year': 2018,
1235 'skip_download': True,
1239 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1240 'only_matching': True,
1243 # invalid -> valid video id redirection
1244 'url': 'DJztXj2GPfl',
1246 'id': 'DJztXj2GPfk',
1248 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1249 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1250 'upload_date': '20090125',
1251 'uploader': 'Prochorowka',
1252 'uploader_id': 'Prochorowka',
1253 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1254 'artist': 'Panjabi MC',
1255 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1256 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1259 'skip_download': True,
1264 def __init__(self, *args, **kwargs):
1265 super(YoutubeIE, self).__init__(*args, **kwargs)
1266 self._player_cache = {}
1268 def report_video_info_webpage_download(self, video_id):
1269 """Report attempt to download video info webpage."""
1270 self.to_screen('%s: Downloading video info webpage' % video_id)
1272 def report_information_extraction(self, video_id):
1273 """Report attempt to extract video information."""
1274 self.to_screen('%s: Extracting video information' % video_id)
1276 def report_unavailable_format(self, video_id, format):
1277 """Report extracted video URL."""
1278 self.to_screen('%s: Format %s not available' % (video_id, format))
1280 def report_rtmp_download(self):
1281 """Indicate the download will use the RTMP protocol."""
1282 self.to_screen('RTMP download detected')
1284 def _signature_cache_id(self, example_sig):
1285 """ Return a string representation of a signature """
1286 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1289 def _extract_player_info(cls, player_url):
1290 for player_re in cls._PLAYER_INFO_RE:
1291 id_m = re.search(player_re, player_url)
1295 raise ExtractorError('Cannot identify player %r' % player_url)
1296 return id_m.group('ext'), id_m.group('id')
1298 def _extract_signature_function(self, video_id, player_url, example_sig):
1299 player_type, player_id = self._extract_player_info(player_url)
1301 # Read from filesystem cache
1302 func_id = '%s_%s_%s' % (
1303 player_type, player_id, self._signature_cache_id(example_sig))
1304 assert os.path.basename(func_id) == func_id
1306 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1307 if cache_spec is not None:
1308 return lambda s: ''.join(s[i] for i in cache_spec)
1311 'Downloading player %s' % player_url
1312 if self._downloader.params.get('verbose') else
1313 'Downloading %s player %s' % (player_type, player_id)
1315 if player_type == 'js':
1316 code = self._download_webpage(
1317 player_url, video_id,
1319 errnote='Download of %s failed' % player_url)
1320 res = self._parse_sig_js(code)
1321 elif player_type == 'swf':
1322 urlh = self._request_webpage(
1323 player_url, video_id,
1325 errnote='Download of %s failed' % player_url)
1327 res = self._parse_sig_swf(code)
1329 assert False, 'Invalid player type %r' % player_type
1331 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1332 cache_res = res(test_string)
1333 cache_spec = [ord(c) for c in cache_res]
1335 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1338 def _print_sig_code(self, func, example_sig):
1339 def gen_sig_code(idxs):
1340 def _genslice(start, end, step):
1341 starts = '' if start == 0 else str(start)
1342 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1343 steps = '' if step == 1 else (':%d' % step)
1344 return 's[%s%s%s]' % (starts, ends, steps)
1347 # Quelch pyflakes warnings - start will be set when step is set
1348 start = '(Never used)'
1349 for i, prev in zip(idxs[1:], idxs[:-1]):
1350 if step is not None:
1351 if i - prev == step:
1353 yield _genslice(start, prev, step)
1356 if i - prev in [-1, 1]:
1361 yield 's[%d]' % prev
1365 yield _genslice(start, i, step)
1367 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1368 cache_res = func(test_string)
1369 cache_spec = [ord(c) for c in cache_res]
1370 expr_code = ' + '.join(gen_sig_code(cache_spec))
1371 signature_id_tuple = '(%s)' % (
1372 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1373 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1374 ' return %s\n') % (signature_id_tuple, expr_code)
1375 self.to_screen('Extracted signature function:\n' + code)
1377 def _parse_sig_js(self, jscode):
1378 funcname = self._search_regex(
1379 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1381 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1382 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1384 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1385 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1386 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1387 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1388 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1389 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1390 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1391 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1392 jscode, 'Initial JS player signature function name', group='sig')
1394 jsi = JSInterpreter(jscode)
1395 initial_function = jsi.extract_function(funcname)
1396 return lambda s: initial_function([s])
1398 def _parse_sig_swf(self, file_contents):
1399 swfi = SWFInterpreter(file_contents)
1400 TARGET_CLASSNAME = 'SignatureDecipher'
1401 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1402 initial_function = swfi.extract_function(searched_class, 'decipher')
1403 return lambda s: initial_function([s])
1405 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1406 """Turn the encrypted s field into a working signature"""
1408 if player_url is None:
1409 raise ExtractorError('Cannot decrypt signature without player_url')
1411 if player_url.startswith('//'):
1412 player_url = 'https:' + player_url
1413 elif not re.match(r'https?://', player_url):
1414 player_url = compat_urlparse.urljoin(
1415 'https://www.youtube.com', player_url)
1417 player_id = (player_url, self._signature_cache_id(s))
1418 if player_id not in self._player_cache:
1419 func = self._extract_signature_function(
1420 video_id, player_url, s
1422 self._player_cache[player_id] = func
1423 func = self._player_cache[player_id]
1424 if self._downloader.params.get('youtube_print_sig_code'):
1425 self._print_sig_code(func, s)
1427 except Exception as e:
1428 tb = traceback.format_exc()
1429 raise ExtractorError(
1430 'Signature extraction failed: ' + tb, cause=e)
1432 def _get_subtitles(self, video_id, webpage):
1434 subs_doc = self._download_xml(
1435 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1436 video_id, note=False)
1437 except ExtractorError as err:
1438 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1442 for track in subs_doc.findall('track'):
1443 lang = track.attrib['lang_code']
1444 if lang in sub_lang_list:
1447 for ext in self._SUBTITLE_FORMATS:
1448 params = compat_urllib_parse_urlencode({
1452 'name': track.attrib['name'].encode('utf-8'),
1454 sub_formats.append({
1455 'url': 'https://www.youtube.com/api/timedtext?' + params,
1458 sub_lang_list[lang] = sub_formats
1459 if not sub_lang_list:
1460 self._downloader.report_warning('video doesn\'t have subtitles')
1462 return sub_lang_list
1464 def _get_ytplayer_config(self, video_id, webpage):
1466 # User data may contain arbitrary character sequences that may affect
1467 # JSON extraction with regex, e.g. when '};' is contained the second
1468 # regex won't capture the whole JSON. Yet working around by trying more
1469 # concrete regex first keeping in mind proper quoted string handling
1470 # to be implemented in future that will replace this workaround (see
1471 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1472 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1473 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1474 r';ytplayer\.config\s*=\s*({.+?});',
1476 config = self._search_regex(
1477 patterns, webpage, 'ytplayer.config', default=None)
1479 return self._parse_json(
1480 uppercase_escape(config), video_id, fatal=False)
1482 def _get_automatic_captions(self, video_id, webpage):
1483 """We need the webpage for getting the captions url, pass it as an
1484 argument to speed up the process."""
1485 self.to_screen('%s: Looking for automatic captions' % video_id)
1486 player_config = self._get_ytplayer_config(video_id, webpage)
1487 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1488 if not player_config:
1489 self._downloader.report_warning(err_msg)
1492 args = player_config['args']
1493 caption_url = args.get('ttsurl')
1495 timestamp = args['timestamp']
1496 # We get the available subtitles
1497 list_params = compat_urllib_parse_urlencode({
1502 list_url = caption_url + '&' + list_params
1503 caption_list = self._download_xml(list_url, video_id)
1504 original_lang_node = caption_list.find('track')
1505 if original_lang_node is None:
1506 self._downloader.report_warning('Video doesn\'t have automatic captions')
1508 original_lang = original_lang_node.attrib['lang_code']
1509 caption_kind = original_lang_node.attrib.get('kind', '')
1512 for lang_node in caption_list.findall('target'):
1513 sub_lang = lang_node.attrib['lang_code']
1515 for ext in self._SUBTITLE_FORMATS:
1516 params = compat_urllib_parse_urlencode({
1517 'lang': original_lang,
1521 'kind': caption_kind,
1523 sub_formats.append({
1524 'url': caption_url + '&' + params,
1527 sub_lang_list[sub_lang] = sub_formats
1528 return sub_lang_list
1530 def make_captions(sub_url, sub_langs):
1531 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1532 caption_qs = compat_parse_qs(parsed_sub_url.query)
1534 for sub_lang in sub_langs:
1536 for ext in self._SUBTITLE_FORMATS:
1538 'tlang': [sub_lang],
1541 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1542 query=compat_urllib_parse_urlencode(caption_qs, True)))
1543 sub_formats.append({
1547 captions[sub_lang] = sub_formats
1550 # New captions format as of 22.06.2017
1551 player_response = args.get('player_response')
1552 if player_response and isinstance(player_response, compat_str):
1553 player_response = self._parse_json(
1554 player_response, video_id, fatal=False)
1556 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1557 base_url = renderer['captionTracks'][0]['baseUrl']
1559 for lang in renderer['translationLanguages']:
1560 lang_code = lang.get('languageCode')
1562 sub_lang_list.append(lang_code)
1563 return make_captions(base_url, sub_lang_list)
1565 # Some videos don't provide ttsurl but rather caption_tracks and
1566 # caption_translation_languages (e.g. 20LmZk1hakA)
1567 # Does not used anymore as of 22.06.2017
1568 caption_tracks = args['caption_tracks']
1569 caption_translation_languages = args['caption_translation_languages']
1570 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1572 for lang in caption_translation_languages.split(','):
1573 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1574 sub_lang = lang_qs.get('lc', [None])[0]
1576 sub_lang_list.append(sub_lang)
1577 return make_captions(caption_url, sub_lang_list)
1578 # An extractor error can be raise by the download process if there are
1579 # no automatic captions but there are subtitles
1580 except (KeyError, IndexError, ExtractorError):
1581 self._downloader.report_warning(err_msg)
1584 def _mark_watched(self, video_id, video_info, player_response):
1585 playback_url = url_or_none(try_get(
1587 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1588 video_info, lambda x: x['videostats_playback_base_url'][0]))
1589 if not playback_url:
1591 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1592 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1594 # cpn generation algorithm is reverse engineered from base.js.
1595 # In fact it works even with dummy cpn.
1596 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1597 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1603 playback_url = compat_urlparse.urlunparse(
1604 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1606 self._download_webpage(
1607 playback_url, video_id, 'Marking watched',
1608 'Unable to mark watched', fatal=False)
1611 def _extract_urls(webpage):
1612 # Embedded YouTube player
1614 unescapeHTML(mobj.group('url'))
1615 for mobj in re.finditer(r'''(?x)
1625 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1626 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1629 # lazyYT YouTube embed
1630 entries.extend(list(map(
1632 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1634 # Wordpress "YouTube Video Importer" plugin
1635 matches = re.findall(r'''(?x)<div[^>]+
1636 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1637 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1638 entries.extend(m[-1] for m in matches)
1643 def _extract_url(webpage):
1644 urls = YoutubeIE._extract_urls(webpage)
1645 return urls[0] if urls else None
1648 def extract_id(cls, url):
1649 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1651 raise ExtractorError('Invalid URL: %s' % url)
1652 video_id = mobj.group(2)
1655 def _extract_chapters_from_json(self, webpage, video_id, duration):
1658 player = self._parse_json(
1660 r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
1661 'player args', default='{}'),
1662 video_id, fatal=False)
1663 if not player or not isinstance(player, dict):
1665 watch_next_response = player.get('watch_next_response')
1666 if not isinstance(watch_next_response, compat_str):
1668 response = self._parse_json(watch_next_response, video_id, fatal=False)
1669 if not response or not isinstance(response, dict):
1671 chapters_list = try_get(
1673 lambda x: x['playerOverlays']
1674 ['playerOverlayRenderer']
1675 ['decoratedPlayerBarRenderer']
1676 ['decoratedPlayerBarRenderer']
1678 ['chapteredPlayerBarRenderer']
1681 if not chapters_list:
1684 def chapter_time(chapter):
1685 return float_or_none(
1688 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1692 for next_num, chapter in enumerate(chapters_list, start=1):
1693 start_time = chapter_time(chapter)
1694 if start_time is None:
1696 end_time = (chapter_time(chapters_list[next_num])
1697 if next_num < len(chapters_list) else duration)
1698 if end_time is None:
1701 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1704 'start_time': start_time,
1705 'end_time': end_time,
1711 def _extract_chapters_from_description(description, duration):
1714 chapter_lines = re.findall(
1715 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1717 if not chapter_lines:
1720 for next_num, (chapter_line, time_point) in enumerate(
1721 chapter_lines, start=1):
1722 start_time = parse_duration(time_point)
1723 if start_time is None:
1725 if start_time > duration:
1727 end_time = (duration if next_num == len(chapter_lines)
1728 else parse_duration(chapter_lines[next_num][1]))
1729 if end_time is None:
1731 if end_time > duration:
1733 if start_time > end_time:
1735 chapter_title = re.sub(
1736 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1737 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1739 'start_time': start_time,
1740 'end_time': end_time,
1741 'title': chapter_title,
1745 def _extract_chapters(self, webpage, description, video_id, duration):
1746 return (self._extract_chapters_from_json(webpage, video_id, duration)
1747 or self._extract_chapters_from_description(description, duration))
1749 def _real_extract(self, url):
1750 url, smuggled_data = unsmuggle_url(url, {})
1753 'http' if self._downloader.params.get('prefer_insecure', False)
1758 parsed_url = compat_urllib_parse_urlparse(url)
1759 for component in [parsed_url.fragment, parsed_url.query]:
1760 query = compat_parse_qs(component)
1761 if start_time is None and 't' in query:
1762 start_time = parse_duration(query['t'][0])
1763 if start_time is None and 'start' in query:
1764 start_time = parse_duration(query['start'][0])
1765 if end_time is None and 'end' in query:
1766 end_time = parse_duration(query['end'][0])
1768 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1769 mobj = re.search(self._NEXT_URL_RE, url)
1771 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1772 video_id = self.extract_id(url)
1775 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1776 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1778 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1779 video_id = qs.get('v', [None])[0] or video_id
1781 # Attempt to extract SWF player URL
1782 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1783 if mobj is not None:
1784 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1790 def add_dash_mpd(video_info):
1791 dash_mpd = video_info.get('dashmpd')
1792 if dash_mpd and dash_mpd[0] not in dash_mpds:
1793 dash_mpds.append(dash_mpd[0])
1795 def add_dash_mpd_pr(pl_response):
1796 dash_mpd = url_or_none(try_get(
1797 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1799 if dash_mpd and dash_mpd not in dash_mpds:
1800 dash_mpds.append(dash_mpd)
1805 def extract_view_count(v_info):
1806 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1808 def extract_player_response(player_response, video_id):
1809 pl_response = str_or_none(player_response)
1812 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1813 if isinstance(pl_response, dict):
1814 add_dash_mpd_pr(pl_response)
1817 player_response = {}
1821 embed_webpage = None
1822 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1824 # We simulate the access to the video from www.youtube.com/v/{video_id}
1825 # this can be viewed without login into Youtube
1826 url = proto + '://www.youtube.com/embed/%s' % video_id
1827 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1828 data = compat_urllib_parse_urlencode({
1829 'video_id': video_id,
1830 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1831 'sts': self._search_regex(
1832 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1834 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1836 video_info_webpage = self._download_webpage(
1837 video_info_url, video_id,
1838 note='Refetching age-gated info webpage',
1839 errnote='unable to download video info webpage')
1840 except ExtractorError:
1841 video_info_webpage = None
1842 if video_info_webpage:
1843 video_info = compat_parse_qs(video_info_webpage)
1844 pl_response = video_info.get('player_response', [None])[0]
1845 player_response = extract_player_response(pl_response, video_id)
1846 add_dash_mpd(video_info)
1847 view_count = extract_view_count(video_info)
1850 # Try looking directly into the video webpage
1851 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1853 args = ytplayer_config['args']
1854 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1855 # Convert to the same format returned by compat_parse_qs
1856 video_info = dict((k, [v]) for k, v in args.items())
1857 add_dash_mpd(video_info)
1858 # Rental video is not rented but preview is available (e.g.
1859 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1860 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1861 if not video_info and args.get('ypc_vid'):
1862 return self.url_result(
1863 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1864 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1866 if not player_response:
1867 player_response = extract_player_response(args.get('player_response'), video_id)
1868 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1869 add_dash_mpd_pr(player_response)
1871 def extract_unavailable_message():
1873 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1874 msg = self._html_search_regex(
1875 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1876 video_webpage, 'unavailable %s' % kind, default=None)
1878 messages.append(msg)
1880 return '\n'.join(messages)
1882 if not video_info and not player_response:
1883 unavailable_message = extract_unavailable_message()
1884 if not unavailable_message:
1885 unavailable_message = 'Unable to extract video data'
1886 raise ExtractorError(
1887 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1889 if not isinstance(video_info, dict):
1892 video_details = try_get(
1893 player_response, lambda x: x['videoDetails'], dict) or {}
1895 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1897 self._downloader.report_warning('Unable to extract video title')
1900 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1901 if video_description:
1904 redir_url = compat_urlparse.urljoin(url, m.group(1))
1905 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1906 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1907 qs = compat_parse_qs(parsed_redir_url.query)
1913 description_original = video_description = re.sub(r'''(?x)
1915 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1916 (?:title|href)="([^"]+)"\s+
1917 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1921 ''', replace_url, video_description)
1922 video_description = clean_html(video_description)
1924 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1926 if not smuggled_data.get('force_singlefeed', False):
1927 if not self._downloader.params.get('noplaylist'):
1928 multifeed_metadata_list = try_get(
1930 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1931 compat_str) or try_get(
1932 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1933 if multifeed_metadata_list:
1936 for feed in multifeed_metadata_list.split(','):
1937 # Unquote should take place before split on comma (,) since textual
1938 # fields may contain comma as well (see
1939 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1940 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1942 def feed_entry(name):
1943 return try_get(feed_data, lambda x: x[name][0], compat_str)
1945 feed_id = feed_entry('id')
1948 feed_title = feed_entry('title')
1951 title += ' (%s)' % feed_title
1953 '_type': 'url_transparent',
1954 'ie_key': 'Youtube',
1956 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1957 {'force_singlefeed': True}),
1960 feed_ids.append(feed_id)
1962 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1963 % (', '.join(feed_ids), video_id))
1964 return self.playlist_result(entries, video_id, video_title, video_description)
1966 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1968 if view_count is None:
1969 view_count = extract_view_count(video_info)
1970 if view_count is None and video_details:
1971 view_count = int_or_none(video_details.get('viewCount'))
1974 is_live = bool_or_none(video_details.get('isLive'))
1976 # Check for "rental" videos
1977 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1978 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1980 def _extract_filesize(media_url):
1981 return int_or_none(self._search_regex(
1982 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1984 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1985 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1987 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1988 self.report_rtmp_download()
1990 'format_id': '_rtmp',
1992 'url': video_info['conn'][0],
1993 'player_url': player_url,
1995 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1996 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1997 if 'rtmpe%3Dyes' in encoded_url_map:
1998 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2001 fmt_list = video_info.get('fmt_list', [''])[0]
2003 for fmt in fmt_list.split(','):
2004 spec = fmt.split('/')
2006 width_height = spec[1].split('x')
2007 if len(width_height) == 2:
2008 formats_spec[spec[0]] = {
2009 'resolution': spec[1],
2010 'width': int_or_none(width_height[0]),
2011 'height': int_or_none(width_height[1]),
2013 for fmt in streaming_formats:
2014 itag = str_or_none(fmt.get('itag'))
2017 quality = fmt.get('quality')
2018 quality_label = fmt.get('qualityLabel') or quality
2019 formats_spec[itag] = {
2020 'asr': int_or_none(fmt.get('audioSampleRate')),
2021 'filesize': int_or_none(fmt.get('contentLength')),
2022 'format_note': quality_label,
2023 'fps': int_or_none(fmt.get('fps')),
2024 'height': int_or_none(fmt.get('height')),
2025 # bitrate for itag 43 is always 2147483647
2026 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2027 'width': int_or_none(fmt.get('width')),
2030 for fmt in streaming_formats:
2031 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2033 url = url_or_none(fmt.get('url'))
2036 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2039 url_data = compat_parse_qs(cipher)
2040 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2045 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2047 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2048 # Unsupported FORMAT_STREAM_TYPE_OTF
2049 if stream_type == 3:
2052 format_id = fmt.get('itag') or url_data['itag'][0]
2055 format_id = compat_str(format_id)
2058 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2059 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2060 jsplayer_url_json = self._search_regex(
2062 embed_webpage if age_gate else video_webpage,
2063 'JS player URL (1)', default=None)
2064 if not jsplayer_url_json and not age_gate:
2065 # We need the embed website after all
2066 if embed_webpage is None:
2067 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2068 embed_webpage = self._download_webpage(
2069 embed_url, video_id, 'Downloading embed webpage')
2070 jsplayer_url_json = self._search_regex(
2071 ASSETS_RE, embed_webpage, 'JS player URL')
2073 player_url = json.loads(jsplayer_url_json)
2074 if player_url is None:
2075 player_url_json = self._search_regex(
2076 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2077 video_webpage, 'age gate player URL')
2078 player_url = json.loads(player_url_json)
2080 if 'sig' in url_data:
2081 url += '&signature=' + url_data['sig'][0]
2082 elif 's' in url_data:
2083 encrypted_sig = url_data['s'][0]
2085 if self._downloader.params.get('verbose'):
2086 if player_url is None:
2087 player_desc = 'unknown'
2089 player_type, player_version = self._extract_player_info(player_url)
2090 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2091 parts_sizes = self._signature_cache_id(encrypted_sig)
2092 self.to_screen('{%s} signature length %s, %s' %
2093 (format_id, parts_sizes, player_desc))
2095 signature = self._decrypt_signature(
2096 encrypted_sig, video_id, player_url, age_gate)
2097 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2098 url += '&%s=%s' % (sp, signature)
2099 if 'ratebypass' not in url:
2100 url += '&ratebypass=yes'
2103 'format_id': format_id,
2105 'player_url': player_url,
2107 if format_id in self._formats:
2108 dct.update(self._formats[format_id])
2109 if format_id in formats_spec:
2110 dct.update(formats_spec[format_id])
2112 # Some itags are not included in DASH manifest thus corresponding formats will
2113 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2114 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2115 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2116 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2119 width = int_or_none(fmt.get('width'))
2121 height = int_or_none(fmt.get('height'))
2123 filesize = int_or_none(url_data.get(
2124 'clen', [None])[0]) or _extract_filesize(url)
2126 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2127 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2129 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2130 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2131 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2134 'filesize': filesize,
2139 'format_note': quality_label or quality,
2141 for key, value in more_fields.items():
2144 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2146 type_split = type_.split(';')
2147 kind_ext = type_split[0].split('/')
2148 if len(kind_ext) == 2:
2150 dct['ext'] = mimetype2ext(type_split[0])
2151 if kind in ('audio', 'video'):
2153 for mobj in re.finditer(
2154 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2155 if mobj.group('key') == 'codecs':
2156 codecs = mobj.group('val')
2159 dct.update(parse_codecs(codecs))
2160 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2161 dct['downloader_options'] = {
2162 # Youtube throttles chunks >~10M
2163 'http_chunk_size': 10485760,
2168 url_or_none(try_get(
2170 lambda x: x['streamingData']['hlsManifestUrl'],
2172 or url_or_none(try_get(
2173 video_info, lambda x: x['hlsvp'][0], compat_str)))
2176 m3u8_formats = self._extract_m3u8_formats(
2177 manifest_url, video_id, 'mp4', fatal=False)
2178 for a_format in m3u8_formats:
2179 itag = self._search_regex(
2180 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2182 a_format['format_id'] = itag
2183 if itag in self._formats:
2184 dct = self._formats[itag].copy()
2185 dct.update(a_format)
2187 a_format['player_url'] = player_url
2188 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2189 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2190 formats.append(a_format)
2192 error_message = extract_unavailable_message()
2193 if not error_message:
2194 error_message = clean_html(try_get(
2195 player_response, lambda x: x['playabilityStatus']['reason'],
2197 if not error_message:
2198 error_message = clean_html(
2199 try_get(video_info, lambda x: x['reason'][0], compat_str))
2201 raise ExtractorError(error_message, expected=True)
2202 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2205 video_uploader = try_get(
2206 video_info, lambda x: x['author'][0],
2207 compat_str) or str_or_none(video_details.get('author'))
2209 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2211 self._downloader.report_warning('unable to extract uploader name')
2214 video_uploader_id = None
2215 video_uploader_url = None
2217 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2219 if mobj is not None:
2220 video_uploader_id = mobj.group('uploader_id')
2221 video_uploader_url = mobj.group('uploader_url')
2223 self._downloader.report_warning('unable to extract uploader nickname')
2226 str_or_none(video_details.get('channelId'))
2227 or self._html_search_meta(
2228 'channelId', video_webpage, 'channel id', default=None)
2229 or self._search_regex(
2230 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2231 video_webpage, 'channel id', default=None, group='id'))
2232 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2235 # We try first to get a high quality image:
2236 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2237 video_webpage, re.DOTALL)
2238 if m_thumb is not None:
2239 video_thumbnail = m_thumb.group(1)
2240 elif 'thumbnail_url' not in video_info:
2241 self._downloader.report_warning('unable to extract video thumbnail')
2242 video_thumbnail = None
2243 else: # don't panic if we can't find it
2244 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2247 upload_date = self._html_search_meta(
2248 'datePublished', video_webpage, 'upload date', default=None)
2250 upload_date = self._search_regex(
2251 [r'(?s)id="eow-date.*?>(.*?)</span>',
2252 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2253 video_webpage, 'upload date', default=None)
2254 upload_date = unified_strdate(upload_date)
2256 video_license = self._html_search_regex(
2257 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2258 video_webpage, 'license', default=None)
2260 m_music = re.search(
2262 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2270 \bhref=["\']/red[^>]*>| # drop possible
2271 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2278 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2279 video_creator = clean_html(m_music.group('creator'))
2281 video_alt_title = video_creator = None
2283 def extract_meta(field):
2284 return self._html_search_regex(
2285 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2286 video_webpage, field, default=None)
2288 track = extract_meta('Song')
2289 artist = extract_meta('Artist')
2290 album = extract_meta('Album')
2292 # Youtube Music Auto-generated description
2293 release_date = release_year = None
2294 if video_description:
2295 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2298 track = mobj.group('track').strip()
2300 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2302 album = mobj.group('album'.strip())
2303 release_year = mobj.group('release_year')
2304 release_date = mobj.group('release_date')
2306 release_date = release_date.replace('-', '')
2307 if not release_year:
2308 release_year = int(release_date[:4])
2310 release_year = int(release_year)
2312 m_episode = re.search(
2313 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2316 series = unescapeHTML(m_episode.group('series'))
2317 season_number = int(m_episode.group('season'))
2318 episode_number = int(m_episode.group('episode'))
2320 series = season_number = episode_number = None
2322 m_cat_container = self._search_regex(
2323 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2324 video_webpage, 'categories', default=None)
2326 category = self._html_search_regex(
2327 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2329 video_categories = None if category is None else [category]
2331 video_categories = None
2334 unescapeHTML(m.group('content'))
2335 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2337 def _extract_count(count_name):
2338 return str_to_int(self._search_regex(
2339 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2340 % re.escape(count_name),
2341 video_webpage, count_name, default=None))
2343 like_count = _extract_count('like')
2344 dislike_count = _extract_count('dislike')
2346 if view_count is None:
2347 view_count = str_to_int(self._search_regex(
2348 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2349 'view count', default=None))
2352 float_or_none(video_details.get('averageRating'))
2353 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2356 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2357 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2359 video_duration = try_get(
2360 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2361 if not video_duration:
2362 video_duration = int_or_none(video_details.get('lengthSeconds'))
2363 if not video_duration:
2364 video_duration = parse_duration(self._html_search_meta(
2365 'duration', video_webpage, 'video duration'))
2368 video_annotations = None
2369 if self._downloader.params.get('writeannotations', False):
2370 xsrf_token = self._search_regex(
2371 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2372 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2373 invideo_url = try_get(
2374 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2375 if xsrf_token and invideo_url:
2376 xsrf_field_name = self._search_regex(
2377 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2378 video_webpage, 'xsrf field name',
2379 group='xsrf_field_name', default='session_token')
2380 video_annotations = self._download_webpage(
2381 self._proto_relative_url(invideo_url),
2382 video_id, note='Downloading annotations',
2383 errnote='Unable to download video annotations', fatal=False,
2384 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2386 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2388 # Look for the DASH manifest
2389 if self._downloader.params.get('youtube_include_dash_manifest', True):
2390 dash_mpd_fatal = True
2391 for mpd_url in dash_mpds:
2394 def decrypt_sig(mobj):
2396 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2397 return '/signature/%s' % dec_s
2399 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2401 for df in self._extract_mpd_formats(
2402 mpd_url, video_id, fatal=dash_mpd_fatal,
2403 formats_dict=self._formats):
2404 if not df.get('filesize'):
2405 df['filesize'] = _extract_filesize(df['url'])
2406 # Do not overwrite DASH format found in some previous DASH manifest
2407 if df['format_id'] not in dash_formats:
2408 dash_formats[df['format_id']] = df
2409 # Additional DASH manifests may end up in HTTP Error 403 therefore
2410 # allow them to fail without bug report message if we already have
2411 # some DASH manifest succeeded. This is temporary workaround to reduce
2412 # burst of bug reports until we figure out the reason and whether it
2413 # can be fixed at all.
2414 dash_mpd_fatal = False
2415 except (ExtractorError, KeyError) as e:
2416 self.report_warning(
2417 'Skipping DASH manifest: %r' % e, video_id)
2419 # Remove the formats we found through non-DASH, they
2420 # contain less info and it can be wrong, because we use
2421 # fixed values (for example the resolution). See
2422 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2424 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2425 formats.extend(dash_formats.values())
2427 # Check for malformed aspect ratio
2428 stretched_m = re.search(
2429 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2432 w = float(stretched_m.group('w'))
2433 h = float(stretched_m.group('h'))
2434 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2435 # We will only process correct ratios.
2439 if f.get('vcodec') != 'none':
2440 f['stretched_ratio'] = ratio
2443 if 'reason' in video_info:
2444 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2445 regions_allowed = self._html_search_meta(
2446 'regionsAllowed', video_webpage, default=None)
2447 countries = regions_allowed.split(',') if regions_allowed else None
2448 self.raise_geo_restricted(
2449 msg=video_info['reason'][0], countries=countries)
2450 reason = video_info['reason'][0]
2451 if 'Invalid parameters' in reason:
2452 unavailable_message = extract_unavailable_message()
2453 if unavailable_message:
2454 reason = unavailable_message
2455 raise ExtractorError(
2456 'YouTube said: %s' % reason,
2457 expected=True, video_id=video_id)
2458 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2459 raise ExtractorError('This video is DRM protected.', expected=True)
2461 self._sort_formats(formats)
2463 self.mark_watched(video_id, video_info, player_response)
2467 'uploader': video_uploader,
2468 'uploader_id': video_uploader_id,
2469 'uploader_url': video_uploader_url,
2470 'channel_id': channel_id,
2471 'channel_url': channel_url,
2472 'upload_date': upload_date,
2473 'license': video_license,
2474 'creator': video_creator or artist,
2475 'title': video_title,
2476 'alt_title': video_alt_title or track,
2477 'thumbnail': video_thumbnail,
2478 'description': video_description,
2479 'categories': video_categories,
2481 'subtitles': video_subtitles,
2482 'automatic_captions': automatic_captions,
2483 'duration': video_duration,
2484 'age_limit': 18 if age_gate else 0,
2485 'annotations': video_annotations,
2486 'chapters': chapters,
2487 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2488 'view_count': view_count,
2489 'like_count': like_count,
2490 'dislike_count': dislike_count,
2491 'average_rating': average_rating,
2494 'start_time': start_time,
2495 'end_time': end_time,
2497 'season_number': season_number,
2498 'episode_number': episode_number,
2502 'release_date': release_date,
2503 'release_year': release_year,
2507 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2508 IE_DESC = 'YouTube.com playlists'
2509 _VALID_URL = r"""(?x)(?:
2514 youtube(?:kids)?\.com|
2519 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2520 \? (?:.*?[&;])*? (?:p|a|list)=
2523 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2526 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2527 # Top tracks, they can also include dots
2533 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2534 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2535 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2536 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2537 IE_NAME = 'youtube:playlist'
2539 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2541 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2542 'uploader': 'Sergey M.',
2543 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2544 'title': 'youtube-dl public playlist',
2546 'playlist_count': 1,
2548 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2550 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2551 'uploader': 'Sergey M.',
2552 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2553 'title': 'youtube-dl empty playlist',
2555 'playlist_count': 0,
2557 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2558 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2560 'title': '29C3: Not my department',
2561 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2562 'uploader': 'Christiaan008',
2563 'uploader_id': 'ChRiStIaAn008',
2565 'playlist_count': 96,
2567 'note': 'issue #673',
2568 'url': 'PLBB231211A4F62143',
2570 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2571 'id': 'PLBB231211A4F62143',
2572 'uploader': 'Wickydoo',
2573 'uploader_id': 'Wickydoo',
2575 'playlist_mincount': 26,
2577 'note': 'Large playlist',
2578 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2580 'title': 'Uploads from Cauchemar',
2581 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2582 'uploader': 'Cauchemar',
2583 'uploader_id': 'Cauchemar89',
2585 'playlist_mincount': 799,
2587 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2589 'title': 'YDL_safe_search',
2590 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2592 'playlist_count': 2,
2593 'skip': 'This playlist is private',
2596 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2597 'playlist_count': 4,
2600 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2601 'uploader': 'milan',
2602 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2605 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2606 'playlist_mincount': 485,
2608 'title': '2018 Chinese New Singles (11/6 updated)',
2609 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2611 'uploader_id': 'sdragonfang',
2614 'note': 'Embedded SWF player',
2615 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2616 'playlist_count': 4,
2619 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2621 'skip': 'This playlist does not exist',
2623 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2624 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2626 'title': 'Uploads from Interstellar Movie',
2627 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2628 'uploader': 'Interstellar Movie',
2629 'uploader_id': 'InterstellarMovie1',
2631 'playlist_mincount': 21,
2633 # Playlist URL that does not actually serve a playlist
2634 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2636 'id': 'FqZTN594JQw',
2638 'title': "Smiley's People 01 detective, Adventure Series, Action",
2639 'uploader': 'STREEM',
2640 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2642 'upload_date': '20150526',
2643 'license': 'Standard YouTube License',
2644 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2645 'categories': ['People & Blogs'],
2649 'dislike_count': int,
2652 'skip_download': True,
2654 'skip': 'This video is not available.',
2655 'add_ie': [YoutubeIE.ie_key()],
2657 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2659 'id': 'yeWKywCrFtk',
2661 'title': 'Small Scale Baler and Braiding Rugs',
2662 'uploader': 'Backus-Page House Museum',
2663 'uploader_id': 'backuspagemuseum',
2664 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2665 'upload_date': '20161008',
2666 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2667 'categories': ['Nonprofits & Activism'],
2670 'dislike_count': int,
2674 'skip_download': True,
2677 # https://github.com/ytdl-org/youtube-dl/issues/21844
2678 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2680 'title': 'Data Analysis with Dr Mike Pound',
2681 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2682 'uploader_id': 'Computerphile',
2683 'uploader': 'Computerphile',
2685 'playlist_mincount': 11,
2687 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2688 'only_matching': True,
2690 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2691 'only_matching': True,
2693 # music album playlist
2694 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2695 'only_matching': True,
2697 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2698 'only_matching': True,
2700 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2701 'only_matching': True,
2704 def _real_initialize(self):
2707 def extract_videos_from_page(self, page):
2711 for item in re.findall(
2712 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2713 attrs = extract_attributes(item)
2714 video_id = attrs['data-video-id']
2715 video_title = unescapeHTML(attrs.get('data-title'))
2717 video_title = video_title.strip()
2718 ids_in_page.append(video_id)
2719 titles_in_page.append(video_title)
2721 # Fallback with old _VIDEO_RE
2722 self.extract_videos_from_page_impl(
2723 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2726 self.extract_videos_from_page_impl(
2727 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2728 ids_in_page, titles_in_page)
2729 self.extract_videos_from_page_impl(
2730 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2731 ids_in_page, titles_in_page)
2733 return zip(ids_in_page, titles_in_page)
2735 def _extract_mix(self, playlist_id):
2736 # The mixes are generated from a single video
2737 # the id of the playlist is just 'RD' + video_id
2739 last_id = playlist_id[-11:]
2740 for n in itertools.count(1):
2741 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2742 webpage = self._download_webpage(
2743 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2744 new_ids = orderedSet(re.findall(
2745 r'''(?xs)data-video-username=".*?".*?
2746 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2748 # Fetch new pages until all the videos are repeated, it seems that
2749 # there are always 51 unique videos.
2750 new_ids = [_id for _id in new_ids if _id not in ids]
2756 url_results = self._ids_to_results(ids)
2758 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2760 search_title('playlist-title')
2761 or search_title('title long-title')
2762 or search_title('title'))
2763 title = clean_html(title_span)
2765 return self.playlist_result(url_results, playlist_id, title)
2767 def _extract_playlist(self, playlist_id):
2768 url = self._TEMPLATE_URL % playlist_id
2769 page = self._download_webpage(url, playlist_id)
2771 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2772 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2773 match = match.strip()
2774 # Check if the playlist exists or is private
2775 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2777 reason = mobj.group('reason')
2778 message = 'This playlist %s' % reason
2779 if 'private' in reason:
2780 message += ', use --username or --netrc to access it'
2782 raise ExtractorError(message, expected=True)
2783 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2784 raise ExtractorError(
2785 'Invalid parameters. Maybe URL is incorrect.',
2787 elif re.match(r'[^<]*Choose your language[^<]*', match):
2790 self.report_warning('Youtube gives an alert message: ' + match)
2792 playlist_title = self._html_search_regex(
2793 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2794 page, 'title', default=None)
2796 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2797 uploader = self._html_search_regex(
2798 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2799 page, 'uploader', default=None)
2801 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2804 uploader_id = mobj.group('uploader_id')
2805 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2807 uploader_id = uploader_url = None
2811 if not playlist_title:
2813 # Some playlist URLs don't actually serve a playlist (e.g.
2814 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2815 next(self._entries(page, playlist_id))
2816 except StopIteration:
2819 playlist = self.playlist_result(
2820 self._entries(page, playlist_id), playlist_id, playlist_title)
2822 'uploader': uploader,
2823 'uploader_id': uploader_id,
2824 'uploader_url': uploader_url,
2827 return has_videos, playlist
2829 def _check_download_just_video(self, url, playlist_id):
2830 # Check if it's a video-specific URL
2831 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2832 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2833 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2834 'video id', default=None)
2836 if self._downloader.params.get('noplaylist'):
2837 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2838 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2840 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2841 return video_id, None
2844 def _real_extract(self, url):
2845 # Extract playlist id
2846 mobj = re.match(self._VALID_URL, url)
2848 raise ExtractorError('Invalid URL: %s' % url)
2849 playlist_id = mobj.group(1) or mobj.group(2)
2851 video_id, video = self._check_download_just_video(url, playlist_id)
2855 if playlist_id.startswith(('RD', 'UL', 'PU')):
2856 # Mixes require a custom extraction process
2857 return self._extract_mix(playlist_id)
2859 has_videos, playlist = self._extract_playlist(playlist_id)
2860 if has_videos or not video_id:
2863 # Some playlist URLs don't actually serve a playlist (see
2864 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2865 # Fallback to plain video extraction if there is a video id
2866 # along with playlist id.
2867 return self.url_result(video_id, 'Youtube', video_id=video_id)
2870 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2871 IE_DESC = 'YouTube.com channels'
2872 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2873 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2874 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2875 IE_NAME = 'youtube:channel'
2877 'note': 'paginated channel',
2878 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2879 'playlist_mincount': 91,
2881 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2882 'title': 'Uploads from lex will',
2883 'uploader': 'lex will',
2884 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2887 'note': 'Age restricted channel',
2888 # from https://www.youtube.com/user/DeusExOfficial
2889 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2890 'playlist_mincount': 64,
2892 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2893 'title': 'Uploads from Deus Ex',
2894 'uploader': 'Deus Ex',
2895 'uploader_id': 'DeusExOfficial',
2898 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2899 'only_matching': True,
2901 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2902 'only_matching': True,
2906 def suitable(cls, url):
2907 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2908 else super(YoutubeChannelIE, cls).suitable(url))
2910 def _build_template_url(self, url, channel_id):
2911 return self._TEMPLATE_URL % channel_id
2913 def _real_extract(self, url):
2914 channel_id = self._match_id(url)
2916 url = self._build_template_url(url, channel_id)
2918 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2919 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2920 # otherwise fallback on channel by page extraction
2921 channel_page = self._download_webpage(
2922 url + '?view=57', channel_id,
2923 'Downloading channel page', fatal=False)
2924 if channel_page is False:
2925 channel_playlist_id = False
2927 channel_playlist_id = self._html_search_meta(
2928 'channelId', channel_page, 'channel id', default=None)
2929 if not channel_playlist_id:
2930 channel_url = self._html_search_meta(
2931 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2932 channel_page, 'channel url', default=None)
2934 channel_playlist_id = self._search_regex(
2935 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2936 channel_url, 'channel id', default=None)
2937 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2938 playlist_id = 'UU' + channel_playlist_id[2:]
2939 return self.url_result(
2940 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2942 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2943 autogenerated = re.search(r'''(?x)
2945 channel-header-autogenerated-label|
2946 yt-channel-title-autogenerated
2947 )[^"]*"''', channel_page) is not None
2950 # The videos are contained in a single page
2951 # the ajax pages can't be used, they are empty
2954 video_id, 'Youtube', video_id=video_id,
2955 video_title=video_title)
2956 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2957 return self.playlist_result(entries, channel_id)
2960 next(self._entries(channel_page, channel_id))
2961 except StopIteration:
2962 alert_message = self._html_search_regex(
2963 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2964 channel_page, 'alert', default=None, group='alert')
2966 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2968 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2971 class YoutubeUserIE(YoutubeChannelIE):
2972 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2973 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2974 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2975 IE_NAME = 'youtube:user'
2978 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2979 'playlist_mincount': 320,
2981 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2982 'title': 'Uploads from The Linux Foundation',
2983 'uploader': 'The Linux Foundation',
2984 'uploader_id': 'TheLinuxFoundation',
2987 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2988 # but not https://www.youtube.com/user/12minuteathlete/videos
2989 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2990 'playlist_mincount': 249,
2992 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2993 'title': 'Uploads from 12 Minute Athlete',
2994 'uploader': '12 Minute Athlete',
2995 'uploader_id': 'the12minuteathlete',
2998 'url': 'ytuser:phihag',
2999 'only_matching': True,
3001 'url': 'https://www.youtube.com/c/gametrailers',
3002 'only_matching': True,
3004 'url': 'https://www.youtube.com/gametrailers',
3005 'only_matching': True,
3007 # This channel is not available, geo restricted to JP
3008 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3009 'only_matching': True,
3013 def suitable(cls, url):
3014 # Don't return True if the url can be extracted with other youtube
3015 # extractor, the regex would is too permissive and it would match.
3016 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3017 if any(ie.suitable(url) for ie in other_yt_ies):
3020 return super(YoutubeUserIE, cls).suitable(url)
3022 def _build_template_url(self, url, channel_id):
3023 mobj = re.match(self._VALID_URL, url)
3024 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3027 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3028 IE_DESC = 'YouTube.com live streams'
3029 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3030 IE_NAME = 'youtube:live'
3033 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3035 'id': 'a48o2S1cPoo',
3037 'title': 'The Young Turks - Live Main Show',
3038 'uploader': 'The Young Turks',
3039 'uploader_id': 'TheYoungTurks',
3040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3041 'upload_date': '20150715',
3042 'license': 'Standard YouTube License',
3043 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3044 'categories': ['News & Politics'],
3045 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3047 'dislike_count': int,
3050 'skip_download': True,
3053 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3054 'only_matching': True,
3056 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3057 'only_matching': True,
3059 'url': 'https://www.youtube.com/TheYoungTurks/live',
3060 'only_matching': True,
3063 def _real_extract(self, url):
3064 mobj = re.match(self._VALID_URL, url)
3065 channel_id = mobj.group('id')
3066 base_url = mobj.group('base_url')
3067 webpage = self._download_webpage(url, channel_id, fatal=False)
3069 page_type = self._og_search_property(
3070 'type', webpage, 'page type', default='')
3071 video_id = self._html_search_meta(
3072 'videoId', webpage, 'video id', default=None)
3073 if page_type.startswith('video') and video_id and re.match(
3074 r'^[0-9A-Za-z_-]{11}$', video_id):
3075 return self.url_result(video_id, YoutubeIE.ie_key())
3076 return self.url_result(base_url)
3079 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3080 IE_DESC = 'YouTube.com user/channel playlists'
3081 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3082 IE_NAME = 'youtube:playlists'
3085 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3086 'playlist_mincount': 4,
3088 'id': 'ThirstForScience',
3089 'title': 'ThirstForScience',
3092 # with "Load more" button
3093 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3094 'playlist_mincount': 70,
3097 'title': 'Игорь Клейнер',
3100 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3101 'playlist_mincount': 17,
3103 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3104 'title': 'Chem Player',
3110 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3111 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3114 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3115 IE_DESC = 'YouTube.com searches'
3116 # there doesn't appear to be a real limit, for example if you search for
3117 # 'python' you get more than 8.000.000 results
3118 _MAX_RESULTS = float('inf')
3119 IE_NAME = 'youtube:search'
3120 _SEARCH_KEY = 'ytsearch'
3121 _EXTRA_QUERY_ARGS = {}
3124 def _get_n_results(self, query, n):
3125 """Get a specified number of results for a query"""
3131 'search_query': query.encode('utf-8'),
3133 url_query.update(self._EXTRA_QUERY_ARGS)
3134 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3136 for pagenum in itertools.count(1):
3137 data = self._download_json(
3138 result_url, video_id='query "%s"' % query,
3139 note='Downloading page %s' % pagenum,
3140 errnote='Unable to download API page',
3141 query={'spf': 'navigate'})
3142 html_content = data[1]['body']['content']
3144 if 'class="search-message' in html_content:
3145 raise ExtractorError(
3146 '[youtube] No video results', expected=True)
3148 new_videos = list(self._process_page(html_content))
3149 videos += new_videos
3150 if not new_videos or len(videos) > limit:
3152 next_link = self._html_search_regex(
3153 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3154 html_content, 'next link', default=None)
3155 if next_link is None:
3157 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3161 return self.playlist_result(videos, query)
3164 class YoutubeSearchDateIE(YoutubeSearchIE):
3165 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3166 _SEARCH_KEY = 'ytsearchdate'
3167 IE_DESC = 'YouTube.com searches, newest videos first'
3168 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3171 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3172 IE_DESC = 'YouTube.com search URLs'
3173 IE_NAME = 'youtube:search_url'
3174 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3176 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3177 'playlist_mincount': 5,
3179 'title': 'youtube-dl test video',
3182 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3183 'only_matching': True,
3186 def _real_extract(self, url):
3187 mobj = re.match(self._VALID_URL, url)
3188 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3189 webpage = self._download_webpage(url, query)
3190 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3193 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3194 IE_DESC = 'YouTube.com (multi-season) shows'
3195 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3196 IE_NAME = 'youtube:show'
3198 'url': 'https://www.youtube.com/show/airdisasters',
3199 'playlist_mincount': 5,
3201 'id': 'airdisasters',
3202 'title': 'Air Disasters',
3206 def _real_extract(self, url):
3207 playlist_id = self._match_id(url)
3208 return super(YoutubeShowIE, self)._real_extract(
3209 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3212 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3214 Base class for feed extractors
3215 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3217 _LOGIN_REQUIRED = True
3221 return 'youtube:%s' % self._FEED_NAME
3223 def _real_initialize(self):
3226 def _entries(self, page):
3227 # The extraction process is the same as for playlists, but the regex
3228 # for the video ids doesn't contain an index
3230 more_widget_html = content_html = page
3231 for page_num in itertools.count(1):
3232 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3234 # 'recommended' feed has infinite 'load more' and each new portion spins
3235 # the same videos in (sometimes) slightly different order, so we'll check
3236 # for unicity and break when portion has no new videos
3237 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3243 for entry in self._ids_to_results(new_ids):
3246 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3250 more = self._download_json(
3251 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3252 'Downloading page #%s' % page_num,
3253 transform_source=uppercase_escape)
3254 content_html = more['content_html']
3255 more_widget_html = more['load_more_widget_html']
3257 def _real_extract(self, url):
3258 page = self._download_webpage(
3259 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3260 self._PLAYLIST_TITLE)
3261 return self.playlist_result(
3262 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3265 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3266 IE_NAME = 'youtube:watchlater'
3267 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3268 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3271 'url': 'https://www.youtube.com/playlist?list=WL',
3272 'only_matching': True,
3274 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3275 'only_matching': True,
3278 def _real_extract(self, url):
3279 _, video = self._check_download_just_video(url, 'WL')
3282 _, playlist = self._extract_playlist('WL')
3286 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3287 IE_NAME = 'youtube:favorites'
3288 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3289 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3290 _LOGIN_REQUIRED = True
3292 def _real_extract(self, url):
3293 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3294 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3295 return self.url_result(playlist_id, 'YoutubePlaylist')
3298 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3299 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3300 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3301 _FEED_NAME = 'recommended'
3302 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3305 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3306 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3307 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3308 _FEED_NAME = 'subscriptions'
3309 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3312 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3313 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3314 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3315 _FEED_NAME = 'history'
3316 _PLAYLIST_TITLE = 'Youtube History'
3319 class YoutubeTruncatedURLIE(InfoExtractor):
3320 IE_NAME = 'youtube:truncated_url'
3321 IE_DESC = False # Do not list
3322 _VALID_URL = r'''(?x)
3324 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3327 annotation_id=annotation_[^&]+|
3333 attribution_link\?a=[^&]+
3339 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3340 'only_matching': True,
3342 'url': 'https://www.youtube.com/watch?',
3343 'only_matching': True,
3345 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3346 'only_matching': True,
3348 'url': 'https://www.youtube.com/watch?feature=foo',
3349 'only_matching': True,
3351 'url': 'https://www.youtube.com/watch?hl=en-GB',
3352 'only_matching': True,
3354 'url': 'https://www.youtube.com/watch?t=2372',
3355 'only_matching': True,
3358 def _real_extract(self, url):
3359 raise ExtractorError(
3360 'Did you forget to quote the URL? Remember that & is a meta '
3361 'character in most shells, so you want to put the URL in quotes, '
3363 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3364 ' or simply youtube-dl BaW_jenozKc .',
3368 class YoutubeTruncatedIDIE(InfoExtractor):
3369 IE_NAME = 'youtube:truncated_id'
3370 IE_DESC = False # Do not list
3371 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3374 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3375 'only_matching': True,
3378 def _real_extract(self, url):
3379 video_id = self._match_id(url)
3380 raise ExtractorError(
3381 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),