3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
36 get_element_by_attribute,
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
73 def _set_language(self):
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76 # YouTube sets the expire time to about two months
77 expire_time=time.time() + 2 * 30 * 24 * 3600)
79 def _ids_to_results(self, ids):
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 username, password = self._get_login_info()
93 # No authentication to be performed
95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
103 if login_page is False:
106 login_form = self._hidden_inputs(login_page)
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116 'f.req': json.dumps(f_req),
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
132 self._downloader.report_warning(message)
136 None, [], None, 'US', None, None, 2, False, True,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 1, [None, None, []], None, None, None, True
147 lookup_results = req(
148 self._LOOKUP_URL, lookup_req,
149 'Looking up account info', 'Unable to look up account info')
151 if lookup_results is False:
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 warn('Unable to extract user hash')
161 None, 1, None, [1, None, None, None, [password, None, True]],
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
171 if challenge_results is False:
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 warn('Unable to extract result entry')
187 login_challenge = try_get(res, lambda x: x[0][0], list)
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
193 status = try_get(login_challenge, lambda x: x[5], compat_str)
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 warn('Unable to extract TL')
203 tfa_code = self._get_tfa_info('2-step verification code')
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 tfa_code = remove_start(tfa_code, 'G-')
214 user_hash, None, 2, None,
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
224 if tfa_results is False:
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 challenge = CHALLENGES.get(
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
255 check_cookie_results = self._download_webpage(
256 check_cookie_url, None, 'Checking cookie', fatal=False)
258 if check_cookie_results is False:
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
267 def _download_webpage_handle(self, *args, **kwargs):
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272 *args, **compat_kwargs(kwargs))
274 def _real_initialize(self):
275 if self._downloader is None:
278 if not self._login():
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283 # Extract entries from page with "Load more" button
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
287 for entry in self._process_page(content_html):
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
296 while count <= retries:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
318 more_widget_html = more['load_more_widget_html']
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 video_id = mobj.group('id')
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
335 video_title = video_title.strip()
336 if video_title == '► Play all':
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
346 def extract_videos_from_page(self, page):
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
351 return zip(ids_in_page, titles_in_page)
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
365 title = self._og_search_title(webpage, fatal=False)
366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370 IE_DESC = 'YouTube.com'
371 _VALID_URL = r"""(?x)^
373 (?:https?://|//) # http(s):// or protocol-independent URL
374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375 (?:www\.)?deturl\.com/www\.youtube\.com/|
376 (?:www\.)?pwnyoutube\.com/|
377 (?:www\.)?hooktube\.com/|
378 (?:www\.)?yourepeat\.com/|
379 tube\.majestyc\.net/|
380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381 (?:(?:www|dev)\.)?invidio\.us/|
382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384 (?:www\.)?invidious\.kabi\.tk/|
385 (?:www\.)?invidious\.13ad\.de/|
386 (?:www\.)?invidious\.mastodon\.host/|
387 (?:www\.)?invidious\.nixnet\.xyz/|
388 (?:www\.)?invidious\.drycat\.fr/|
389 (?:www\.)?tube\.poal\.co/|
390 (?:www\.)?vid\.wxzm\.sx/|
391 (?:www\.)?yt\.elukerio\.org/|
392 (?:www\.)?yt\.lelux\.fi/|
393 (?:www\.)?kgg2m7yk5aybusll\.onion/|
394 (?:www\.)?qklhadlycap4cnod\.onion/|
395 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
396 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
397 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
398 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
399 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
404 |(?: # or the v= param in all its forms
405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
406 (?:\?|\#!?) # the params delimiter ? or # or #!
407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
412 youtu\.be| # just youtu.be/xxxx
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
418 )? # all until now is optional -> you can pass the naked ID
419 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
426 (?(1).+)? # if we found the ID, everything can follow
427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
428 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
430 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
431 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
433 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
434 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
435 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
436 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
437 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
439 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
440 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
443 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
445 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
446 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
447 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
452 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
454 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
456 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
457 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459 # Apple HTTP Live Streaming
460 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
461 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
463 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
465 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
470 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
476 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
480 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
485 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
486 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
487 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
488 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
490 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
493 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
494 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
500 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
509 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
511 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
518 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
520 # Dash webm audio with opus inside
521 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
522 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
523 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
526 '_rtmp': {'protocol': 'rtmp'},
528 # av01 video only formats sometimes served with "unknown" codecs
529 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
530 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
545 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
546 'uploader': 'Philipp Hagemeister',
547 'uploader_id': 'phihag',
548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
549 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
550 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
551 'upload_date': '20121002',
552 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553 'categories': ['Science & Technology'],
554 'tags': ['youtube-dl'],
558 'dislike_count': int,
564 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
565 'note': 'Test generic use_cipher_signature video (#897)',
569 'upload_date': '20120506',
570 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
571 'alt_title': 'I Love It (feat. Charli XCX)',
572 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
573 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
574 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
575 'iconic ep', 'iconic', 'love', 'it'],
577 'uploader': 'Icona Pop',
578 'uploader_id': 'IconaPop',
579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
580 'creator': 'Icona Pop',
581 'track': 'I Love It (feat. Charli XCX)',
582 'artist': 'Icona Pop',
586 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
587 'note': 'Test VEVO video with age protection (#956)',
591 'upload_date': '20130703',
592 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
593 'alt_title': 'Tunnel Vision',
594 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
596 'uploader': 'justintimberlakeVEVO',
597 'uploader_id': 'justintimberlakeVEVO',
598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
599 'creator': 'Justin Timberlake',
600 'track': 'Tunnel Vision',
601 'artist': 'Justin Timberlake',
606 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
607 'note': 'Embed-only video (#1746)',
611 'upload_date': '20120608',
612 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
613 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
614 'uploader': 'SET India',
615 'uploader_id': 'setindia',
616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
621 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
622 'note': 'Use the first video ID in the URL',
626 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
627 'uploader': 'Philipp Hagemeister',
628 'uploader_id': 'phihag',
629 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
630 'upload_date': '20121002',
631 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
632 'categories': ['Science & Technology'],
633 'tags': ['youtube-dl'],
637 'dislike_count': int,
640 'skip_download': True,
644 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
645 'note': '256k DASH audio (format 141) via DASH manifest',
649 'upload_date': '20121002',
650 'uploader_id': '8KVIDEO',
651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
653 'uploader': '8KVIDEO',
654 'title': 'UHDTV TEST 8K VIDEO.mp4'
657 'youtube_include_dash_manifest': True,
660 'skip': 'format 141 not served anymore',
662 # DASH manifest with encrypted signature
664 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
668 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
669 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
671 'uploader': 'AfrojackVEVO',
672 'uploader_id': 'AfrojackVEVO',
673 'upload_date': '20131011',
676 'youtube_include_dash_manifest': True,
677 'format': '141/bestaudio[ext=m4a]',
680 # JS player signature function name containing $
682 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
686 'title': 'Taylor Swift - Shake It Off',
687 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
689 'uploader': 'TaylorSwiftVEVO',
690 'uploader_id': 'TaylorSwiftVEVO',
691 'upload_date': '20140818',
694 'youtube_include_dash_manifest': True,
695 'format': '141/bestaudio[ext=m4a]',
700 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
705 'upload_date': '20100909',
706 'uploader': 'Amazing Atheist',
707 'uploader_id': 'TheAmazingAtheist',
708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
709 'title': 'Burning Everyone\'s Koran',
710 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
713 # Normal age-gate video (No vevo, embed allowed)
715 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
719 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
720 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
722 'uploader': 'The Witcher',
723 'uploader_id': 'WitcherGame',
724 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
725 'upload_date': '20140605',
729 # Age-gate video with encrypted signature
731 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
735 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
736 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
738 'uploader': 'LloydVEVO',
739 'uploader_id': 'LloydVEVO',
740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
741 'upload_date': '20110629',
745 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
746 # YouTube Red ad is not captured for creator
748 'url': '__2ABJjxzNo',
753 'upload_date': '20100430',
754 'uploader_id': 'deadmau5',
755 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
756 'creator': 'Dada Life, deadmau5',
757 'description': 'md5:12c56784b8032162bb936a5f76d55360',
758 'uploader': 'deadmau5',
759 'title': 'Deadmau5 - Some Chords (HD)',
760 'alt_title': 'This Machine Kills Some Chords',
762 'expected_warnings': [
763 'DASH manifest missing',
766 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
768 'url': 'lqQg6PlCWgI',
773 'upload_date': '20150827',
774 'uploader_id': 'olympic',
775 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
776 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
777 'uploader': 'Olympic',
778 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
781 'skip_download': 'requires avconv',
786 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
790 'stretched_ratio': 16 / 9.,
792 'upload_date': '20110310',
793 'uploader_id': 'AllenMeow',
794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
795 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
797 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
800 # url_encoded_fmt_stream_map is empty string
802 'url': 'qEJwOuvDf7I',
806 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
808 'upload_date': '20150404',
809 'uploader_id': 'spbelect',
810 'uploader': 'Наблюдатели Петербурга',
813 'skip_download': 'requires avconv',
815 'skip': 'This live event has ended.',
817 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
819 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
823 'title': 'md5:7b81415841e02ecd4313668cde88737a',
824 'description': 'md5:116377fd2963b81ec4ce64b542173306',
826 'upload_date': '20150625',
827 'uploader_id': 'dorappi2000',
828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
829 'uploader': 'dorappi2000',
830 'formats': 'mincount:31',
832 'skip': 'not actual anymore',
834 # DASH manifest with segment_list
836 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
837 'md5': '8ce563a1d667b599d21064e982ab9e31',
841 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
842 'uploader': 'Airtek',
843 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
844 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
845 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
848 'youtube_include_dash_manifest': True,
849 'format': '135', # bestvideo
851 'skip': 'This live event has ended.',
854 # Multifeed videos (multiple cameras), URL is for Main Camera
855 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
858 'title': 'teamPGP: Rocket League Noob Stream',
859 'description': 'md5:dc7872fb300e143831327f1bae3af010',
865 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
866 'description': 'md5:dc7872fb300e143831327f1bae3af010',
868 'upload_date': '20150721',
869 'uploader': 'Beer Games Beer',
870 'uploader_id': 'beergamesbeer',
871 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
872 'license': 'Standard YouTube License',
878 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
879 'description': 'md5:dc7872fb300e143831327f1bae3af010',
881 'upload_date': '20150721',
882 'uploader': 'Beer Games Beer',
883 'uploader_id': 'beergamesbeer',
884 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
885 'license': 'Standard YouTube License',
891 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
892 'description': 'md5:dc7872fb300e143831327f1bae3af010',
894 'upload_date': '20150721',
895 'uploader': 'Beer Games Beer',
896 'uploader_id': 'beergamesbeer',
897 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
898 'license': 'Standard YouTube License',
904 'title': 'teamPGP: Rocket League Noob Stream (zim)',
905 'description': 'md5:dc7872fb300e143831327f1bae3af010',
907 'upload_date': '20150721',
908 'uploader': 'Beer Games Beer',
909 'uploader_id': 'beergamesbeer',
910 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
911 'license': 'Standard YouTube License',
915 'skip_download': True,
917 'skip': 'This video is not available.',
920 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
921 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
924 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
927 'skip': 'Not multifeed anymore',
930 'url': 'https://vid.plus/FlRa-iH7PGw',
931 'only_matching': True,
934 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
935 'only_matching': True,
938 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
939 # Also tests cut-off URL expansion in video description (see
940 # https://github.com/ytdl-org/youtube-dl/issues/1892,
941 # https://github.com/ytdl-org/youtube-dl/issues/8164)
942 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
946 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
947 'alt_title': 'Dark Walk - Position Music',
948 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
950 'upload_date': '20151119',
951 'uploader_id': 'IronSoulElf',
952 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
953 'uploader': 'IronSoulElf',
954 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
955 'track': 'Dark Walk - Position Music',
956 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
957 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
960 'skip_download': True,
964 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
965 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
966 'only_matching': True,
969 # Video with yt:stretch=17:0
970 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
974 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
975 'description': 'md5:ee18a25c350637c8faff806845bddee9',
976 'upload_date': '20151107',
977 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
978 'uploader': 'CH GAMER DROID',
981 'skip_download': True,
983 'skip': 'This video does not exist.',
986 # Video licensed under Creative Commons
987 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
991 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
992 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
994 'upload_date': '20150127',
995 'uploader_id': 'BerkmanCenter',
996 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
997 'uploader': 'The Berkman Klein Center for Internet & Society',
998 'license': 'Creative Commons Attribution license (reuse allowed)',
1001 'skip_download': True,
1005 # Channel-like uploader_url
1006 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1008 'id': 'eQcmzGIKrzg',
1010 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1011 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1013 'upload_date': '20151119',
1014 'uploader': 'Bernie Sanders',
1015 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1016 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1017 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 'skip_download': True,
1024 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1025 'only_matching': True,
1028 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1029 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1030 'only_matching': True,
1033 # Rental video preview
1034 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1036 'id': 'uGpuVWrhIzE',
1038 'title': 'Piku - Trailer',
1039 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1040 'upload_date': '20150811',
1041 'uploader': 'FlixMatrix',
1042 'uploader_id': 'FlixMatrixKaravan',
1043 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1044 'license': 'Standard YouTube License',
1047 'skip_download': True,
1049 'skip': 'This video is not available.',
1052 # YouTube Red video with episode data
1053 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1055 'id': 'iqKdEhx-dD4',
1057 'title': 'Isolation - Mind Field (Ep 1)',
1058 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1060 'upload_date': '20170118',
1061 'uploader': 'Vsauce',
1062 'uploader_id': 'Vsauce',
1063 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1064 'series': 'Mind Field',
1066 'episode_number': 1,
1069 'skip_download': True,
1071 'expected_warnings': [
1072 'Skipping DASH manifest',
1076 # The following content has been identified by the YouTube community
1077 # as inappropriate or offensive to some audiences.
1078 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1080 'id': '6SJNVb0GnPI',
1082 'title': 'Race Differences in Intelligence',
1083 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1085 'upload_date': '20140124',
1086 'uploader': 'New Century Foundation',
1087 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1091 'skip_download': True,
1096 'url': '1t24XAntNCY',
1097 'only_matching': True,
1100 # geo restricted to JP
1101 'url': 'sJL6WA-aGkQ',
1102 'only_matching': True,
1105 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1106 'only_matching': True,
1109 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1110 'only_matching': True,
1114 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1115 'only_matching': True,
1118 # Video with unsupported adaptive stream type formats
1119 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1121 'id': 'Z4Vy8R84T1U',
1123 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1124 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1126 'upload_date': '20130923',
1127 'uploader': 'Amelia Putri Harwita',
1128 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1129 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1130 'formats': 'maxcount:10',
1133 'skip_download': True,
1134 'youtube_include_dash_manifest': False,
1136 'skip': 'not actual anymore',
1139 # Youtube Music Auto-generated description
1140 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'id': 'MgNrAu2pzNs',
1144 'title': 'Voyeur Girl',
1145 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1146 'upload_date': '20190312',
1147 'uploader': 'Stephen - Topic',
1148 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1149 'artist': 'Stephen',
1150 'track': 'Voyeur Girl',
1151 'album': 'it\'s too much love to know my dear',
1152 'release_date': '20190313',
1153 'release_year': 2019,
1156 'skip_download': True,
1160 # Youtube Music Auto-generated description
1161 # Retrieve 'artist' field from 'Artist:' in video description
1162 # when it is present on youtube music video
1163 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'id': 'k0jLE7tTwjY',
1167 'title': 'Latch Feat. Sam Smith',
1168 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1169 'upload_date': '20150110',
1170 'uploader': 'Various Artists - Topic',
1171 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1172 'artist': 'Disclosure',
1173 'track': 'Latch Feat. Sam Smith',
1174 'album': 'Latch Featuring Sam Smith',
1175 'release_date': '20121008',
1176 'release_year': 2012,
1179 'skip_download': True,
1183 # Youtube Music Auto-generated description
1184 # handle multiple artists on youtube music video
1185 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'id': '74qn0eJSjpA',
1189 'title': 'Eastside',
1190 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1191 'upload_date': '20180710',
1192 'uploader': 'Benny Blanco - Topic',
1193 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1194 'artist': 'benny blanco, Halsey, Khalid',
1195 'track': 'Eastside',
1196 'album': 'Eastside',
1197 'release_date': '20180713',
1198 'release_year': 2018,
1201 'skip_download': True,
1205 # Youtube Music Auto-generated description
1206 # handle youtube music video with release_year and no release_date
1207 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'id': '-hcAI0g-f5M',
1211 'title': 'Put It On Me',
1212 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1213 'upload_date': '20180426',
1214 'uploader': 'Matt Maeson - Topic',
1215 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1216 'artist': 'Matt Maeson',
1217 'track': 'Put It On Me',
1218 'album': 'The Hearse',
1219 'release_date': None,
1220 'release_year': 2018,
1223 'skip_download': True,
1227 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1228 'only_matching': True,
1232 def __init__(self, *args, **kwargs):
1233 super(YoutubeIE, self).__init__(*args, **kwargs)
1234 self._player_cache = {}
1236 def report_video_info_webpage_download(self, video_id):
1237 """Report attempt to download video info webpage."""
1238 self.to_screen('%s: Downloading video info webpage' % video_id)
1240 def report_information_extraction(self, video_id):
1241 """Report attempt to extract video information."""
1242 self.to_screen('%s: Extracting video information' % video_id)
1244 def report_unavailable_format(self, video_id, format):
1245 """Report extracted video URL."""
1246 self.to_screen('%s: Format %s not available' % (video_id, format))
1248 def report_rtmp_download(self):
1249 """Indicate the download will use the RTMP protocol."""
1250 self.to_screen('RTMP download detected')
1252 def _signature_cache_id(self, example_sig):
1253 """ Return a string representation of a signature """
1254 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1256 def _extract_signature_function(self, video_id, player_url, example_sig):
1258 r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1261 raise ExtractorError('Cannot identify player %r' % player_url)
1262 player_type = id_m.group('ext')
1263 player_id = id_m.group('id')
1265 # Read from filesystem cache
1266 func_id = '%s_%s_%s' % (
1267 player_type, player_id, self._signature_cache_id(example_sig))
1268 assert os.path.basename(func_id) == func_id
1270 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1271 if cache_spec is not None:
1272 return lambda s: ''.join(s[i] for i in cache_spec)
1275 'Downloading player %s' % player_url
1276 if self._downloader.params.get('verbose') else
1277 'Downloading %s player %s' % (player_type, player_id)
1279 if player_type == 'js':
1280 code = self._download_webpage(
1281 player_url, video_id,
1283 errnote='Download of %s failed' % player_url)
1284 res = self._parse_sig_js(code)
1285 elif player_type == 'swf':
1286 urlh = self._request_webpage(
1287 player_url, video_id,
1289 errnote='Download of %s failed' % player_url)
1291 res = self._parse_sig_swf(code)
1293 assert False, 'Invalid player type %r' % player_type
1295 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1296 cache_res = res(test_string)
1297 cache_spec = [ord(c) for c in cache_res]
1299 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1302 def _print_sig_code(self, func, example_sig):
1303 def gen_sig_code(idxs):
1304 def _genslice(start, end, step):
1305 starts = '' if start == 0 else str(start)
1306 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1307 steps = '' if step == 1 else (':%d' % step)
1308 return 's[%s%s%s]' % (starts, ends, steps)
1311 # Quelch pyflakes warnings - start will be set when step is set
1312 start = '(Never used)'
1313 for i, prev in zip(idxs[1:], idxs[:-1]):
1314 if step is not None:
1315 if i - prev == step:
1317 yield _genslice(start, prev, step)
1320 if i - prev in [-1, 1]:
1325 yield 's[%d]' % prev
1329 yield _genslice(start, i, step)
1331 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1332 cache_res = func(test_string)
1333 cache_spec = [ord(c) for c in cache_res]
1334 expr_code = ' + '.join(gen_sig_code(cache_spec))
1335 signature_id_tuple = '(%s)' % (
1336 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1337 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1338 ' return %s\n') % (signature_id_tuple, expr_code)
1339 self.to_screen('Extracted signature function:\n' + code)
1341 def _parse_sig_js(self, jscode):
1342 funcname = self._search_regex(
1343 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1344 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1346 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1348 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1356 jscode, 'Initial JS player signature function name', group='sig')
1358 jsi = JSInterpreter(jscode)
1359 initial_function = jsi.extract_function(funcname)
1360 return lambda s: initial_function([s])
1362 def _parse_sig_swf(self, file_contents):
1363 swfi = SWFInterpreter(file_contents)
1364 TARGET_CLASSNAME = 'SignatureDecipher'
1365 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1366 initial_function = swfi.extract_function(searched_class, 'decipher')
1367 return lambda s: initial_function([s])
1369 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1370 """Turn the encrypted s field into a working signature"""
1372 if player_url is None:
1373 raise ExtractorError('Cannot decrypt signature without player_url')
1375 if player_url.startswith('//'):
1376 player_url = 'https:' + player_url
1377 elif not re.match(r'https?://', player_url):
1378 player_url = compat_urlparse.urljoin(
1379 'https://www.youtube.com', player_url)
1381 player_id = (player_url, self._signature_cache_id(s))
1382 if player_id not in self._player_cache:
1383 func = self._extract_signature_function(
1384 video_id, player_url, s
1386 self._player_cache[player_id] = func
1387 func = self._player_cache[player_id]
1388 if self._downloader.params.get('youtube_print_sig_code'):
1389 self._print_sig_code(func, s)
1391 except Exception as e:
1392 tb = traceback.format_exc()
1393 raise ExtractorError(
1394 'Signature extraction failed: ' + tb, cause=e)
1396 def _get_subtitles(self, video_id, webpage):
1398 subs_doc = self._download_xml(
1399 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1400 video_id, note=False)
1401 except ExtractorError as err:
1402 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1406 for track in subs_doc.findall('track'):
1407 lang = track.attrib['lang_code']
1408 if lang in sub_lang_list:
1411 for ext in self._SUBTITLE_FORMATS:
1412 params = compat_urllib_parse_urlencode({
1416 'name': track.attrib['name'].encode('utf-8'),
1418 sub_formats.append({
1419 'url': 'https://www.youtube.com/api/timedtext?' + params,
1422 sub_lang_list[lang] = sub_formats
1423 if not sub_lang_list:
1424 self._downloader.report_warning('video doesn\'t have subtitles')
1426 return sub_lang_list
1428 def _get_ytplayer_config(self, video_id, webpage):
1430 # User data may contain arbitrary character sequences that may affect
1431 # JSON extraction with regex, e.g. when '};' is contained the second
1432 # regex won't capture the whole JSON. Yet working around by trying more
1433 # concrete regex first keeping in mind proper quoted string handling
1434 # to be implemented in future that will replace this workaround (see
1435 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1436 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1437 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1438 r';ytplayer\.config\s*=\s*({.+?});',
1440 config = self._search_regex(
1441 patterns, webpage, 'ytplayer.config', default=None)
1443 return self._parse_json(
1444 uppercase_escape(config), video_id, fatal=False)
1446 def _get_automatic_captions(self, video_id, webpage):
1447 """We need the webpage for getting the captions url, pass it as an
1448 argument to speed up the process."""
1449 self.to_screen('%s: Looking for automatic captions' % video_id)
1450 player_config = self._get_ytplayer_config(video_id, webpage)
1451 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1452 if not player_config:
1453 self._downloader.report_warning(err_msg)
1456 args = player_config['args']
1457 caption_url = args.get('ttsurl')
1459 timestamp = args['timestamp']
1460 # We get the available subtitles
1461 list_params = compat_urllib_parse_urlencode({
1466 list_url = caption_url + '&' + list_params
1467 caption_list = self._download_xml(list_url, video_id)
1468 original_lang_node = caption_list.find('track')
1469 if original_lang_node is None:
1470 self._downloader.report_warning('Video doesn\'t have automatic captions')
1472 original_lang = original_lang_node.attrib['lang_code']
1473 caption_kind = original_lang_node.attrib.get('kind', '')
1476 for lang_node in caption_list.findall('target'):
1477 sub_lang = lang_node.attrib['lang_code']
1479 for ext in self._SUBTITLE_FORMATS:
1480 params = compat_urllib_parse_urlencode({
1481 'lang': original_lang,
1485 'kind': caption_kind,
1487 sub_formats.append({
1488 'url': caption_url + '&' + params,
1491 sub_lang_list[sub_lang] = sub_formats
1492 return sub_lang_list
1494 def make_captions(sub_url, sub_langs):
1495 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1496 caption_qs = compat_parse_qs(parsed_sub_url.query)
1498 for sub_lang in sub_langs:
1500 for ext in self._SUBTITLE_FORMATS:
1502 'tlang': [sub_lang],
1505 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1506 query=compat_urllib_parse_urlencode(caption_qs, True)))
1507 sub_formats.append({
1511 captions[sub_lang] = sub_formats
1514 # New captions format as of 22.06.2017
1515 player_response = args.get('player_response')
1516 if player_response and isinstance(player_response, compat_str):
1517 player_response = self._parse_json(
1518 player_response, video_id, fatal=False)
1520 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1521 base_url = renderer['captionTracks'][0]['baseUrl']
1523 for lang in renderer['translationLanguages']:
1524 lang_code = lang.get('languageCode')
1526 sub_lang_list.append(lang_code)
1527 return make_captions(base_url, sub_lang_list)
1529 # Some videos don't provide ttsurl but rather caption_tracks and
1530 # caption_translation_languages (e.g. 20LmZk1hakA)
1531 # Does not used anymore as of 22.06.2017
1532 caption_tracks = args['caption_tracks']
1533 caption_translation_languages = args['caption_translation_languages']
1534 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1536 for lang in caption_translation_languages.split(','):
1537 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1538 sub_lang = lang_qs.get('lc', [None])[0]
1540 sub_lang_list.append(sub_lang)
1541 return make_captions(caption_url, sub_lang_list)
1542 # An extractor error can be raise by the download process if there are
1543 # no automatic captions but there are subtitles
1544 except (KeyError, IndexError, ExtractorError):
1545 self._downloader.report_warning(err_msg)
1548 def _mark_watched(self, video_id, video_info, player_response):
1549 playback_url = url_or_none(try_get(
1551 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1552 video_info, lambda x: x['videostats_playback_base_url'][0]))
1553 if not playback_url:
1555 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1556 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1558 # cpn generation algorithm is reverse engineered from base.js.
1559 # In fact it works even with dummy cpn.
1560 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1561 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1567 playback_url = compat_urlparse.urlunparse(
1568 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1570 self._download_webpage(
1571 playback_url, video_id, 'Marking watched',
1572 'Unable to mark watched', fatal=False)
1575 def _extract_urls(webpage):
1576 # Embedded YouTube player
1578 unescapeHTML(mobj.group('url'))
1579 for mobj in re.finditer(r'''(?x)
1589 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1590 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1593 # lazyYT YouTube embed
1594 entries.extend(list(map(
1596 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1598 # Wordpress "YouTube Video Importer" plugin
1599 matches = re.findall(r'''(?x)<div[^>]+
1600 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1601 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1602 entries.extend(m[-1] for m in matches)
1607 def _extract_url(webpage):
1608 urls = YoutubeIE._extract_urls(webpage)
1609 return urls[0] if urls else None
1612 def extract_id(cls, url):
1613 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1615 raise ExtractorError('Invalid URL: %s' % url)
1616 video_id = mobj.group(2)
1620 def _extract_chapters(description, duration):
1623 chapter_lines = re.findall(
1624 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1626 if not chapter_lines:
1629 for next_num, (chapter_line, time_point) in enumerate(
1630 chapter_lines, start=1):
1631 start_time = parse_duration(time_point)
1632 if start_time is None:
1634 if start_time > duration:
1636 end_time = (duration if next_num == len(chapter_lines)
1637 else parse_duration(chapter_lines[next_num][1]))
1638 if end_time is None:
1640 if end_time > duration:
1642 if start_time > end_time:
1644 chapter_title = re.sub(
1645 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1646 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1648 'start_time': start_time,
1649 'end_time': end_time,
1650 'title': chapter_title,
1654 def _real_extract(self, url):
1655 url, smuggled_data = unsmuggle_url(url, {})
1658 'http' if self._downloader.params.get('prefer_insecure', False)
1663 parsed_url = compat_urllib_parse_urlparse(url)
1664 for component in [parsed_url.fragment, parsed_url.query]:
1665 query = compat_parse_qs(component)
1666 if start_time is None and 't' in query:
1667 start_time = parse_duration(query['t'][0])
1668 if start_time is None and 'start' in query:
1669 start_time = parse_duration(query['start'][0])
1670 if end_time is None and 'end' in query:
1671 end_time = parse_duration(query['end'][0])
1673 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1674 mobj = re.search(self._NEXT_URL_RE, url)
1676 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1677 video_id = self.extract_id(url)
1680 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1681 video_webpage = self._download_webpage(url, video_id)
1683 # Attempt to extract SWF player URL
1684 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1685 if mobj is not None:
1686 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1692 def add_dash_mpd(video_info):
1693 dash_mpd = video_info.get('dashmpd')
1694 if dash_mpd and dash_mpd[0] not in dash_mpds:
1695 dash_mpds.append(dash_mpd[0])
1697 def add_dash_mpd_pr(pl_response):
1698 dash_mpd = url_or_none(try_get(
1699 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1701 if dash_mpd and dash_mpd not in dash_mpds:
1702 dash_mpds.append(dash_mpd)
1707 def extract_view_count(v_info):
1708 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1710 def extract_player_response(player_response, video_id):
1711 pl_response = str_or_none(player_response)
1714 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1715 if isinstance(pl_response, dict):
1716 add_dash_mpd_pr(pl_response)
1719 player_response = {}
1723 embed_webpage = None
1724 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1726 # We simulate the access to the video from www.youtube.com/v/{video_id}
1727 # this can be viewed without login into Youtube
1728 url = proto + '://www.youtube.com/embed/%s' % video_id
1729 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1730 data = compat_urllib_parse_urlencode({
1731 'video_id': video_id,
1732 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1733 'sts': self._search_regex(
1734 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1736 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1738 video_info_webpage = self._download_webpage(
1739 video_info_url, video_id,
1740 note='Refetching age-gated info webpage',
1741 errnote='unable to download video info webpage')
1742 except ExtractorError:
1743 video_info_webpage = None
1744 if video_info_webpage:
1745 video_info = compat_parse_qs(video_info_webpage)
1746 pl_response = video_info.get('player_response', [None])[0]
1747 player_response = extract_player_response(pl_response, video_id)
1748 add_dash_mpd(video_info)
1749 view_count = extract_view_count(video_info)
1752 # Try looking directly into the video webpage
1753 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1755 args = ytplayer_config['args']
1756 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1757 # Convert to the same format returned by compat_parse_qs
1758 video_info = dict((k, [v]) for k, v in args.items())
1759 add_dash_mpd(video_info)
1760 # Rental video is not rented but preview is available (e.g.
1761 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1762 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1763 if not video_info and args.get('ypc_vid'):
1764 return self.url_result(
1765 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1766 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1768 if not player_response:
1769 player_response = extract_player_response(args.get('player_response'), video_id)
1770 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1771 add_dash_mpd_pr(player_response)
1773 def extract_unavailable_message():
1775 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1776 msg = self._html_search_regex(
1777 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1778 video_webpage, 'unavailable %s' % kind, default=None)
1780 messages.append(msg)
1782 return '\n'.join(messages)
1784 if not video_info and not player_response:
1785 unavailable_message = extract_unavailable_message()
1786 if not unavailable_message:
1787 unavailable_message = 'Unable to extract video data'
1788 raise ExtractorError(
1789 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1791 if not isinstance(video_info, dict):
1794 video_details = try_get(
1795 player_response, lambda x: x['videoDetails'], dict) or {}
1797 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1799 self._downloader.report_warning('Unable to extract video title')
1802 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1803 if video_description:
1806 redir_url = compat_urlparse.urljoin(url, m.group(1))
1807 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1808 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1809 qs = compat_parse_qs(parsed_redir_url.query)
1815 description_original = video_description = re.sub(r'''(?x)
1817 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1818 (?:title|href)="([^"]+)"\s+
1819 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1823 ''', replace_url, video_description)
1824 video_description = clean_html(video_description)
1826 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1828 if not smuggled_data.get('force_singlefeed', False):
1829 if not self._downloader.params.get('noplaylist'):
1830 multifeed_metadata_list = try_get(
1832 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1833 compat_str) or try_get(
1834 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1835 if multifeed_metadata_list:
1838 for feed in multifeed_metadata_list.split(','):
1839 # Unquote should take place before split on comma (,) since textual
1840 # fields may contain comma as well (see
1841 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1842 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1844 def feed_entry(name):
1845 return try_get(feed_data, lambda x: x[name][0], compat_str)
1847 feed_id = feed_entry('id')
1850 feed_title = feed_entry('title')
1853 title += ' (%s)' % feed_title
1855 '_type': 'url_transparent',
1856 'ie_key': 'Youtube',
1858 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1859 {'force_singlefeed': True}),
1862 feed_ids.append(feed_id)
1864 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1865 % (', '.join(feed_ids), video_id))
1866 return self.playlist_result(entries, video_id, video_title, video_description)
1868 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1870 if view_count is None:
1871 view_count = extract_view_count(video_info)
1872 if view_count is None and video_details:
1873 view_count = int_or_none(video_details.get('viewCount'))
1876 is_live = bool_or_none(video_details.get('isLive'))
1878 # Check for "rental" videos
1879 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1880 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1882 def _extract_filesize(media_url):
1883 return int_or_none(self._search_regex(
1884 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1886 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1887 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1889 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1890 self.report_rtmp_download()
1892 'format_id': '_rtmp',
1894 'url': video_info['conn'][0],
1895 'player_url': player_url,
1897 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1898 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1899 if 'rtmpe%3Dyes' in encoded_url_map:
1900 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1903 fmt_list = video_info.get('fmt_list', [''])[0]
1905 for fmt in fmt_list.split(','):
1906 spec = fmt.split('/')
1908 width_height = spec[1].split('x')
1909 if len(width_height) == 2:
1910 formats_spec[spec[0]] = {
1911 'resolution': spec[1],
1912 'width': int_or_none(width_height[0]),
1913 'height': int_or_none(width_height[1]),
1915 for fmt in streaming_formats:
1916 itag = str_or_none(fmt.get('itag'))
1919 quality = fmt.get('quality')
1920 quality_label = fmt.get('qualityLabel') or quality
1921 formats_spec[itag] = {
1922 'asr': int_or_none(fmt.get('audioSampleRate')),
1923 'filesize': int_or_none(fmt.get('contentLength')),
1924 'format_note': quality_label,
1925 'fps': int_or_none(fmt.get('fps')),
1926 'height': int_or_none(fmt.get('height')),
1927 # bitrate for itag 43 is always 2147483647
1928 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1929 'width': int_or_none(fmt.get('width')),
1932 for fmt in streaming_formats:
1933 if fmt.get('drm_families'):
1935 url = url_or_none(fmt.get('url'))
1938 cipher = fmt.get('cipher')
1941 url_data = compat_parse_qs(cipher)
1942 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1947 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1949 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1950 # Unsupported FORMAT_STREAM_TYPE_OTF
1951 if stream_type == 3:
1954 format_id = fmt.get('itag') or url_data['itag'][0]
1957 format_id = compat_str(format_id)
1960 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1961 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1962 jsplayer_url_json = self._search_regex(
1964 embed_webpage if age_gate else video_webpage,
1965 'JS player URL (1)', default=None)
1966 if not jsplayer_url_json and not age_gate:
1967 # We need the embed website after all
1968 if embed_webpage is None:
1969 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1970 embed_webpage = self._download_webpage(
1971 embed_url, video_id, 'Downloading embed webpage')
1972 jsplayer_url_json = self._search_regex(
1973 ASSETS_RE, embed_webpage, 'JS player URL')
1975 player_url = json.loads(jsplayer_url_json)
1976 if player_url is None:
1977 player_url_json = self._search_regex(
1978 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1979 video_webpage, 'age gate player URL')
1980 player_url = json.loads(player_url_json)
1982 if 'sig' in url_data:
1983 url += '&signature=' + url_data['sig'][0]
1984 elif 's' in url_data:
1985 encrypted_sig = url_data['s'][0]
1987 if self._downloader.params.get('verbose'):
1988 if player_url is None:
1989 player_version = 'unknown'
1990 player_desc = 'unknown'
1992 if player_url.endswith('swf'):
1993 player_version = self._search_regex(
1994 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1995 'flash player', fatal=False)
1996 player_desc = 'flash player %s' % player_version
1998 player_version = self._search_regex(
1999 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2000 r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2002 'html5 player', fatal=False)
2003 player_desc = 'html5 player %s' % player_version
2005 parts_sizes = self._signature_cache_id(encrypted_sig)
2006 self.to_screen('{%s} signature length %s, %s' %
2007 (format_id, parts_sizes, player_desc))
2009 signature = self._decrypt_signature(
2010 encrypted_sig, video_id, player_url, age_gate)
2011 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2012 url += '&%s=%s' % (sp, signature)
2013 if 'ratebypass' not in url:
2014 url += '&ratebypass=yes'
2017 'format_id': format_id,
2019 'player_url': player_url,
2021 if format_id in self._formats:
2022 dct.update(self._formats[format_id])
2023 if format_id in formats_spec:
2024 dct.update(formats_spec[format_id])
2026 # Some itags are not included in DASH manifest thus corresponding formats will
2027 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2028 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2029 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2030 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2033 width = int_or_none(fmt.get('width'))
2035 height = int_or_none(fmt.get('height'))
2037 filesize = int_or_none(url_data.get(
2038 'clen', [None])[0]) or _extract_filesize(url)
2040 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2041 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2043 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2044 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2045 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2048 'filesize': filesize,
2053 'format_note': quality_label or quality,
2055 for key, value in more_fields.items():
2058 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2060 type_split = type_.split(';')
2061 kind_ext = type_split[0].split('/')
2062 if len(kind_ext) == 2:
2064 dct['ext'] = mimetype2ext(type_split[0])
2065 if kind in ('audio', 'video'):
2067 for mobj in re.finditer(
2068 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2069 if mobj.group('key') == 'codecs':
2070 codecs = mobj.group('val')
2073 dct.update(parse_codecs(codecs))
2074 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2075 dct['downloader_options'] = {
2076 # Youtube throttles chunks >~10M
2077 'http_chunk_size': 10485760,
2082 url_or_none(try_get(
2084 lambda x: x['streamingData']['hlsManifestUrl'],
2086 or url_or_none(try_get(
2087 video_info, lambda x: x['hlsvp'][0], compat_str)))
2090 m3u8_formats = self._extract_m3u8_formats(
2091 manifest_url, video_id, 'mp4', fatal=False)
2092 for a_format in m3u8_formats:
2093 itag = self._search_regex(
2094 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2096 a_format['format_id'] = itag
2097 if itag in self._formats:
2098 dct = self._formats[itag].copy()
2099 dct.update(a_format)
2101 a_format['player_url'] = player_url
2102 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2103 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2104 formats.append(a_format)
2106 error_message = extract_unavailable_message()
2107 if not error_message:
2108 error_message = clean_html(try_get(
2109 player_response, lambda x: x['playabilityStatus']['reason'],
2111 if not error_message:
2112 error_message = clean_html(
2113 try_get(video_info, lambda x: x['reason'][0], compat_str))
2115 raise ExtractorError(error_message, expected=True)
2116 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2119 video_uploader = try_get(
2120 video_info, lambda x: x['author'][0],
2121 compat_str) or str_or_none(video_details.get('author'))
2123 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2125 self._downloader.report_warning('unable to extract uploader name')
2128 video_uploader_id = None
2129 video_uploader_url = None
2131 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2133 if mobj is not None:
2134 video_uploader_id = mobj.group('uploader_id')
2135 video_uploader_url = mobj.group('uploader_url')
2137 self._downloader.report_warning('unable to extract uploader nickname')
2140 str_or_none(video_details.get('channelId'))
2141 or self._html_search_meta(
2142 'channelId', video_webpage, 'channel id', default=None)
2143 or self._search_regex(
2144 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2145 video_webpage, 'channel id', default=None, group='id'))
2146 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2149 # We try first to get a high quality image:
2150 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2151 video_webpage, re.DOTALL)
2152 if m_thumb is not None:
2153 video_thumbnail = m_thumb.group(1)
2154 elif 'thumbnail_url' not in video_info:
2155 self._downloader.report_warning('unable to extract video thumbnail')
2156 video_thumbnail = None
2157 else: # don't panic if we can't find it
2158 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2161 upload_date = self._html_search_meta(
2162 'datePublished', video_webpage, 'upload date', default=None)
2164 upload_date = self._search_regex(
2165 [r'(?s)id="eow-date.*?>(.*?)</span>',
2166 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2167 video_webpage, 'upload date', default=None)
2168 upload_date = unified_strdate(upload_date)
2170 video_license = self._html_search_regex(
2171 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2172 video_webpage, 'license', default=None)
2174 m_music = re.search(
2176 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2184 \bhref=["\']/red[^>]*>| # drop possible
2185 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2192 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2193 video_creator = clean_html(m_music.group('creator'))
2195 video_alt_title = video_creator = None
2197 def extract_meta(field):
2198 return self._html_search_regex(
2199 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2200 video_webpage, field, default=None)
2202 track = extract_meta('Song')
2203 artist = extract_meta('Artist')
2204 album = extract_meta('Album')
2206 # Youtube Music Auto-generated description
2207 release_date = release_year = None
2208 if video_description:
2209 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2212 track = mobj.group('track').strip()
2214 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2216 album = mobj.group('album'.strip())
2217 release_year = mobj.group('release_year')
2218 release_date = mobj.group('release_date')
2220 release_date = release_date.replace('-', '')
2221 if not release_year:
2222 release_year = int(release_date[:4])
2224 release_year = int(release_year)
2226 m_episode = re.search(
2227 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2230 series = unescapeHTML(m_episode.group('series'))
2231 season_number = int(m_episode.group('season'))
2232 episode_number = int(m_episode.group('episode'))
2234 series = season_number = episode_number = None
2236 m_cat_container = self._search_regex(
2237 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2238 video_webpage, 'categories', default=None)
2240 category = self._html_search_regex(
2241 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2243 video_categories = None if category is None else [category]
2245 video_categories = None
2248 unescapeHTML(m.group('content'))
2249 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2251 def _extract_count(count_name):
2252 return str_to_int(self._search_regex(
2253 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2254 % re.escape(count_name),
2255 video_webpage, count_name, default=None))
2257 like_count = _extract_count('like')
2258 dislike_count = _extract_count('dislike')
2260 if view_count is None:
2261 view_count = str_to_int(self._search_regex(
2262 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2263 'view count', default=None))
2266 float_or_none(video_details.get('averageRating'))
2267 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2270 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2271 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2273 video_duration = try_get(
2274 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2275 if not video_duration:
2276 video_duration = int_or_none(video_details.get('lengthSeconds'))
2277 if not video_duration:
2278 video_duration = parse_duration(self._html_search_meta(
2279 'duration', video_webpage, 'video duration'))
2282 video_annotations = None
2283 if self._downloader.params.get('writeannotations', False):
2284 xsrf_token = self._search_regex(
2285 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2286 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2287 invideo_url = try_get(
2288 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2289 if xsrf_token and invideo_url:
2290 xsrf_field_name = self._search_regex(
2291 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2292 video_webpage, 'xsrf field name',
2293 group='xsrf_field_name', default='session_token')
2294 video_annotations = self._download_webpage(
2295 self._proto_relative_url(invideo_url),
2296 video_id, note='Downloading annotations',
2297 errnote='Unable to download video annotations', fatal=False,
2298 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2300 chapters = self._extract_chapters(description_original, video_duration)
2302 # Look for the DASH manifest
2303 if self._downloader.params.get('youtube_include_dash_manifest', True):
2304 dash_mpd_fatal = True
2305 for mpd_url in dash_mpds:
2308 def decrypt_sig(mobj):
2310 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2311 return '/signature/%s' % dec_s
2313 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2315 for df in self._extract_mpd_formats(
2316 mpd_url, video_id, fatal=dash_mpd_fatal,
2317 formats_dict=self._formats):
2318 if not df.get('filesize'):
2319 df['filesize'] = _extract_filesize(df['url'])
2320 # Do not overwrite DASH format found in some previous DASH manifest
2321 if df['format_id'] not in dash_formats:
2322 dash_formats[df['format_id']] = df
2323 # Additional DASH manifests may end up in HTTP Error 403 therefore
2324 # allow them to fail without bug report message if we already have
2325 # some DASH manifest succeeded. This is temporary workaround to reduce
2326 # burst of bug reports until we figure out the reason and whether it
2327 # can be fixed at all.
2328 dash_mpd_fatal = False
2329 except (ExtractorError, KeyError) as e:
2330 self.report_warning(
2331 'Skipping DASH manifest: %r' % e, video_id)
2333 # Remove the formats we found through non-DASH, they
2334 # contain less info and it can be wrong, because we use
2335 # fixed values (for example the resolution). See
2336 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2338 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2339 formats.extend(dash_formats.values())
2341 # Check for malformed aspect ratio
2342 stretched_m = re.search(
2343 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2346 w = float(stretched_m.group('w'))
2347 h = float(stretched_m.group('h'))
2348 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2349 # We will only process correct ratios.
2353 if f.get('vcodec') != 'none':
2354 f['stretched_ratio'] = ratio
2357 if 'reason' in video_info:
2358 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2359 regions_allowed = self._html_search_meta(
2360 'regionsAllowed', video_webpage, default=None)
2361 countries = regions_allowed.split(',') if regions_allowed else None
2362 self.raise_geo_restricted(
2363 msg=video_info['reason'][0], countries=countries)
2364 reason = video_info['reason'][0]
2365 if 'Invalid parameters' in reason:
2366 unavailable_message = extract_unavailable_message()
2367 if unavailable_message:
2368 reason = unavailable_message
2369 raise ExtractorError(
2370 'YouTube said: %s' % reason,
2371 expected=True, video_id=video_id)
2372 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2373 raise ExtractorError('This video is DRM protected.', expected=True)
2375 self._sort_formats(formats)
2377 self.mark_watched(video_id, video_info, player_response)
2381 'uploader': video_uploader,
2382 'uploader_id': video_uploader_id,
2383 'uploader_url': video_uploader_url,
2384 'channel_id': channel_id,
2385 'channel_url': channel_url,
2386 'upload_date': upload_date,
2387 'license': video_license,
2388 'creator': video_creator or artist,
2389 'title': video_title,
2390 'alt_title': video_alt_title or track,
2391 'thumbnail': video_thumbnail,
2392 'description': video_description,
2393 'categories': video_categories,
2395 'subtitles': video_subtitles,
2396 'automatic_captions': automatic_captions,
2397 'duration': video_duration,
2398 'age_limit': 18 if age_gate else 0,
2399 'annotations': video_annotations,
2400 'chapters': chapters,
2401 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2402 'view_count': view_count,
2403 'like_count': like_count,
2404 'dislike_count': dislike_count,
2405 'average_rating': average_rating,
2408 'start_time': start_time,
2409 'end_time': end_time,
2411 'season_number': season_number,
2412 'episode_number': episode_number,
2416 'release_date': release_date,
2417 'release_year': release_year,
2421 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2422 IE_DESC = 'YouTube.com playlists'
2423 _VALID_URL = r"""(?x)(?:
2428 youtube(?:kids)?\.com|
2433 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2434 \? (?:.*?[&;])*? (?:p|a|list)=
2437 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2440 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2441 # Top tracks, they can also include dots
2447 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2448 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2449 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2450 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2451 IE_NAME = 'youtube:playlist'
2453 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2455 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2456 'uploader': 'Sergey M.',
2457 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2458 'title': 'youtube-dl public playlist',
2460 'playlist_count': 1,
2462 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2464 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2465 'uploader': 'Sergey M.',
2466 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2467 'title': 'youtube-dl empty playlist',
2469 'playlist_count': 0,
2471 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2472 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2474 'title': '29C3: Not my department',
2475 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2476 'uploader': 'Christiaan008',
2477 'uploader_id': 'ChRiStIaAn008',
2479 'playlist_count': 96,
2481 'note': 'issue #673',
2482 'url': 'PLBB231211A4F62143',
2484 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2485 'id': 'PLBB231211A4F62143',
2486 'uploader': 'Wickydoo',
2487 'uploader_id': 'Wickydoo',
2489 'playlist_mincount': 26,
2491 'note': 'Large playlist',
2492 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2494 'title': 'Uploads from Cauchemar',
2495 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2496 'uploader': 'Cauchemar',
2497 'uploader_id': 'Cauchemar89',
2499 'playlist_mincount': 799,
2501 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2503 'title': 'YDL_safe_search',
2504 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2506 'playlist_count': 2,
2507 'skip': 'This playlist is private',
2510 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2511 'playlist_count': 4,
2514 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2515 'uploader': 'milan',
2516 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2519 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2520 'playlist_mincount': 485,
2522 'title': '2018 Chinese New Singles (11/6 updated)',
2523 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2525 'uploader_id': 'sdragonfang',
2528 'note': 'Embedded SWF player',
2529 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2530 'playlist_count': 4,
2533 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2535 'skip': 'This playlist does not exist',
2537 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2538 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2540 'title': 'Uploads from Interstellar Movie',
2541 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2542 'uploader': 'Interstellar Movie',
2543 'uploader_id': 'InterstellarMovie1',
2545 'playlist_mincount': 21,
2547 # Playlist URL that does not actually serve a playlist
2548 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2550 'id': 'FqZTN594JQw',
2552 'title': "Smiley's People 01 detective, Adventure Series, Action",
2553 'uploader': 'STREEM',
2554 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2555 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2556 'upload_date': '20150526',
2557 'license': 'Standard YouTube License',
2558 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2559 'categories': ['People & Blogs'],
2563 'dislike_count': int,
2566 'skip_download': True,
2568 'skip': 'This video is not available.',
2569 'add_ie': [YoutubeIE.ie_key()],
2571 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2573 'id': 'yeWKywCrFtk',
2575 'title': 'Small Scale Baler and Braiding Rugs',
2576 'uploader': 'Backus-Page House Museum',
2577 'uploader_id': 'backuspagemuseum',
2578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2579 'upload_date': '20161008',
2580 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2581 'categories': ['Nonprofits & Activism'],
2584 'dislike_count': int,
2588 'skip_download': True,
2591 # https://github.com/ytdl-org/youtube-dl/issues/21844
2592 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2594 'title': 'Data Analysis with Dr Mike Pound',
2595 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2596 'uploader_id': 'Computerphile',
2597 'uploader': 'Computerphile',
2599 'playlist_mincount': 11,
2601 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2602 'only_matching': True,
2604 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2605 'only_matching': True,
2607 # music album playlist
2608 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2609 'only_matching': True,
2611 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2612 'only_matching': True,
2614 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2615 'only_matching': True,
2618 def _real_initialize(self):
2621 def extract_videos_from_page(self, page):
2625 for item in re.findall(
2626 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2627 attrs = extract_attributes(item)
2628 video_id = attrs['data-video-id']
2629 video_title = unescapeHTML(attrs.get('data-title'))
2631 video_title = video_title.strip()
2632 ids_in_page.append(video_id)
2633 titles_in_page.append(video_title)
2635 # Fallback with old _VIDEO_RE
2636 self.extract_videos_from_page_impl(
2637 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2640 self.extract_videos_from_page_impl(
2641 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2642 ids_in_page, titles_in_page)
2643 self.extract_videos_from_page_impl(
2644 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2645 ids_in_page, titles_in_page)
2647 return zip(ids_in_page, titles_in_page)
2649 def _extract_mix(self, playlist_id):
2650 # The mixes are generated from a single video
2651 # the id of the playlist is just 'RD' + video_id
2653 last_id = playlist_id[-11:]
2654 for n in itertools.count(1):
2655 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2656 webpage = self._download_webpage(
2657 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2658 new_ids = orderedSet(re.findall(
2659 r'''(?xs)data-video-username=".*?".*?
2660 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2662 # Fetch new pages until all the videos are repeated, it seems that
2663 # there are always 51 unique videos.
2664 new_ids = [_id for _id in new_ids if _id not in ids]
2670 url_results = self._ids_to_results(ids)
2672 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2674 search_title('playlist-title')
2675 or search_title('title long-title')
2676 or search_title('title'))
2677 title = clean_html(title_span)
2679 return self.playlist_result(url_results, playlist_id, title)
2681 def _extract_playlist(self, playlist_id):
2682 url = self._TEMPLATE_URL % playlist_id
2683 page = self._download_webpage(url, playlist_id)
2685 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2686 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2687 match = match.strip()
2688 # Check if the playlist exists or is private
2689 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2691 reason = mobj.group('reason')
2692 message = 'This playlist %s' % reason
2693 if 'private' in reason:
2694 message += ', use --username or --netrc to access it'
2696 raise ExtractorError(message, expected=True)
2697 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2698 raise ExtractorError(
2699 'Invalid parameters. Maybe URL is incorrect.',
2701 elif re.match(r'[^<]*Choose your language[^<]*', match):
2704 self.report_warning('Youtube gives an alert message: ' + match)
2706 playlist_title = self._html_search_regex(
2707 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2708 page, 'title', default=None)
2710 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2711 uploader = self._html_search_regex(
2712 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2713 page, 'uploader', default=None)
2715 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2718 uploader_id = mobj.group('uploader_id')
2719 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2721 uploader_id = uploader_url = None
2725 if not playlist_title:
2727 # Some playlist URLs don't actually serve a playlist (e.g.
2728 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2729 next(self._entries(page, playlist_id))
2730 except StopIteration:
2733 playlist = self.playlist_result(
2734 self._entries(page, playlist_id), playlist_id, playlist_title)
2736 'uploader': uploader,
2737 'uploader_id': uploader_id,
2738 'uploader_url': uploader_url,
2741 return has_videos, playlist
2743 def _check_download_just_video(self, url, playlist_id):
2744 # Check if it's a video-specific URL
2745 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2746 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2747 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2748 'video id', default=None)
2750 if self._downloader.params.get('noplaylist'):
2751 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2752 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2754 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2755 return video_id, None
2758 def _real_extract(self, url):
2759 # Extract playlist id
2760 mobj = re.match(self._VALID_URL, url)
2762 raise ExtractorError('Invalid URL: %s' % url)
2763 playlist_id = mobj.group(1) or mobj.group(2)
2765 video_id, video = self._check_download_just_video(url, playlist_id)
2769 if playlist_id.startswith(('RD', 'UL', 'PU')):
2770 # Mixes require a custom extraction process
2771 return self._extract_mix(playlist_id)
2773 has_videos, playlist = self._extract_playlist(playlist_id)
2774 if has_videos or not video_id:
2777 # Some playlist URLs don't actually serve a playlist (see
2778 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2779 # Fallback to plain video extraction if there is a video id
2780 # along with playlist id.
2781 return self.url_result(video_id, 'Youtube', video_id=video_id)
2784 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2785 IE_DESC = 'YouTube.com channels'
2786 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2787 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2788 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2789 IE_NAME = 'youtube:channel'
2791 'note': 'paginated channel',
2792 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2793 'playlist_mincount': 91,
2795 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2796 'title': 'Uploads from lex will',
2797 'uploader': 'lex will',
2798 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2801 'note': 'Age restricted channel',
2802 # from https://www.youtube.com/user/DeusExOfficial
2803 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2804 'playlist_mincount': 64,
2806 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2807 'title': 'Uploads from Deus Ex',
2808 'uploader': 'Deus Ex',
2809 'uploader_id': 'DeusExOfficial',
2812 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2813 'only_matching': True,
2815 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2816 'only_matching': True,
2820 def suitable(cls, url):
2821 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2822 else super(YoutubeChannelIE, cls).suitable(url))
2824 def _build_template_url(self, url, channel_id):
2825 return self._TEMPLATE_URL % channel_id
2827 def _real_extract(self, url):
2828 channel_id = self._match_id(url)
2830 url = self._build_template_url(url, channel_id)
2832 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2833 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2834 # otherwise fallback on channel by page extraction
2835 channel_page = self._download_webpage(
2836 url + '?view=57', channel_id,
2837 'Downloading channel page', fatal=False)
2838 if channel_page is False:
2839 channel_playlist_id = False
2841 channel_playlist_id = self._html_search_meta(
2842 'channelId', channel_page, 'channel id', default=None)
2843 if not channel_playlist_id:
2844 channel_url = self._html_search_meta(
2845 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2846 channel_page, 'channel url', default=None)
2848 channel_playlist_id = self._search_regex(
2849 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2850 channel_url, 'channel id', default=None)
2851 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2852 playlist_id = 'UU' + channel_playlist_id[2:]
2853 return self.url_result(
2854 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2856 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2857 autogenerated = re.search(r'''(?x)
2859 channel-header-autogenerated-label|
2860 yt-channel-title-autogenerated
2861 )[^"]*"''', channel_page) is not None
2864 # The videos are contained in a single page
2865 # the ajax pages can't be used, they are empty
2868 video_id, 'Youtube', video_id=video_id,
2869 video_title=video_title)
2870 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2871 return self.playlist_result(entries, channel_id)
2874 next(self._entries(channel_page, channel_id))
2875 except StopIteration:
2876 alert_message = self._html_search_regex(
2877 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2878 channel_page, 'alert', default=None, group='alert')
2880 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2882 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2885 class YoutubeUserIE(YoutubeChannelIE):
2886 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2887 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2888 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2889 IE_NAME = 'youtube:user'
2892 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2893 'playlist_mincount': 320,
2895 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2896 'title': 'Uploads from The Linux Foundation',
2897 'uploader': 'The Linux Foundation',
2898 'uploader_id': 'TheLinuxFoundation',
2901 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2902 # but not https://www.youtube.com/user/12minuteathlete/videos
2903 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2904 'playlist_mincount': 249,
2906 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2907 'title': 'Uploads from 12 Minute Athlete',
2908 'uploader': '12 Minute Athlete',
2909 'uploader_id': 'the12minuteathlete',
2912 'url': 'ytuser:phihag',
2913 'only_matching': True,
2915 'url': 'https://www.youtube.com/c/gametrailers',
2916 'only_matching': True,
2918 'url': 'https://www.youtube.com/gametrailers',
2919 'only_matching': True,
2921 # This channel is not available, geo restricted to JP
2922 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2923 'only_matching': True,
2927 def suitable(cls, url):
2928 # Don't return True if the url can be extracted with other youtube
2929 # extractor, the regex would is too permissive and it would match.
2930 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2931 if any(ie.suitable(url) for ie in other_yt_ies):
2934 return super(YoutubeUserIE, cls).suitable(url)
2936 def _build_template_url(self, url, channel_id):
2937 mobj = re.match(self._VALID_URL, url)
2938 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2941 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2942 IE_DESC = 'YouTube.com live streams'
2943 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2944 IE_NAME = 'youtube:live'
2947 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2949 'id': 'a48o2S1cPoo',
2951 'title': 'The Young Turks - Live Main Show',
2952 'uploader': 'The Young Turks',
2953 'uploader_id': 'TheYoungTurks',
2954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2955 'upload_date': '20150715',
2956 'license': 'Standard YouTube License',
2957 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2958 'categories': ['News & Politics'],
2959 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2961 'dislike_count': int,
2964 'skip_download': True,
2967 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2968 'only_matching': True,
2970 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2971 'only_matching': True,
2973 'url': 'https://www.youtube.com/TheYoungTurks/live',
2974 'only_matching': True,
2977 def _real_extract(self, url):
2978 mobj = re.match(self._VALID_URL, url)
2979 channel_id = mobj.group('id')
2980 base_url = mobj.group('base_url')
2981 webpage = self._download_webpage(url, channel_id, fatal=False)
2983 page_type = self._og_search_property(
2984 'type', webpage, 'page type', default='')
2985 video_id = self._html_search_meta(
2986 'videoId', webpage, 'video id', default=None)
2987 if page_type.startswith('video') and video_id and re.match(
2988 r'^[0-9A-Za-z_-]{11}$', video_id):
2989 return self.url_result(video_id, YoutubeIE.ie_key())
2990 return self.url_result(base_url)
2993 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2994 IE_DESC = 'YouTube.com user/channel playlists'
2995 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2996 IE_NAME = 'youtube:playlists'
2999 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3000 'playlist_mincount': 4,
3002 'id': 'ThirstForScience',
3003 'title': 'ThirstForScience',
3006 # with "Load more" button
3007 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3008 'playlist_mincount': 70,
3011 'title': 'Игорь Клейнер',
3014 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3015 'playlist_mincount': 17,
3017 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3018 'title': 'Chem Player',
3024 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3025 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3028 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3029 IE_DESC = 'YouTube.com searches'
3030 # there doesn't appear to be a real limit, for example if you search for
3031 # 'python' you get more than 8.000.000 results
3032 _MAX_RESULTS = float('inf')
3033 IE_NAME = 'youtube:search'
3034 _SEARCH_KEY = 'ytsearch'
3035 _EXTRA_QUERY_ARGS = {}
3038 def _get_n_results(self, query, n):
3039 """Get a specified number of results for a query"""
3045 'search_query': query.encode('utf-8'),
3047 url_query.update(self._EXTRA_QUERY_ARGS)
3048 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3050 for pagenum in itertools.count(1):
3051 data = self._download_json(
3052 result_url, video_id='query "%s"' % query,
3053 note='Downloading page %s' % pagenum,
3054 errnote='Unable to download API page',
3055 query={'spf': 'navigate'})
3056 html_content = data[1]['body']['content']
3058 if 'class="search-message' in html_content:
3059 raise ExtractorError(
3060 '[youtube] No video results', expected=True)
3062 new_videos = list(self._process_page(html_content))
3063 videos += new_videos
3064 if not new_videos or len(videos) > limit:
3066 next_link = self._html_search_regex(
3067 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3068 html_content, 'next link', default=None)
3069 if next_link is None:
3071 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3075 return self.playlist_result(videos, query)
3078 class YoutubeSearchDateIE(YoutubeSearchIE):
3079 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3080 _SEARCH_KEY = 'ytsearchdate'
3081 IE_DESC = 'YouTube.com searches, newest videos first'
3082 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3085 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3086 IE_DESC = 'YouTube.com search URLs'
3087 IE_NAME = 'youtube:search_url'
3088 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3090 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3091 'playlist_mincount': 5,
3093 'title': 'youtube-dl test video',
3096 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3097 'only_matching': True,
3100 def _real_extract(self, url):
3101 mobj = re.match(self._VALID_URL, url)
3102 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3103 webpage = self._download_webpage(url, query)
3104 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3107 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3108 IE_DESC = 'YouTube.com (multi-season) shows'
3109 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3110 IE_NAME = 'youtube:show'
3112 'url': 'https://www.youtube.com/show/airdisasters',
3113 'playlist_mincount': 5,
3115 'id': 'airdisasters',
3116 'title': 'Air Disasters',
3120 def _real_extract(self, url):
3121 playlist_id = self._match_id(url)
3122 return super(YoutubeShowIE, self)._real_extract(
3123 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3126 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3128 Base class for feed extractors
3129 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3131 _LOGIN_REQUIRED = True
3135 return 'youtube:%s' % self._FEED_NAME
3137 def _real_initialize(self):
3140 def _entries(self, page):
3141 # The extraction process is the same as for playlists, but the regex
3142 # for the video ids doesn't contain an index
3144 more_widget_html = content_html = page
3145 for page_num in itertools.count(1):
3146 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3148 # 'recommended' feed has infinite 'load more' and each new portion spins
3149 # the same videos in (sometimes) slightly different order, so we'll check
3150 # for unicity and break when portion has no new videos
3151 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3157 for entry in self._ids_to_results(new_ids):
3160 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3164 more = self._download_json(
3165 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3166 'Downloading page #%s' % page_num,
3167 transform_source=uppercase_escape)
3168 content_html = more['content_html']
3169 more_widget_html = more['load_more_widget_html']
3171 def _real_extract(self, url):
3172 page = self._download_webpage(
3173 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3174 self._PLAYLIST_TITLE)
3175 return self.playlist_result(
3176 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3179 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3180 IE_NAME = 'youtube:watchlater'
3181 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3182 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3185 'url': 'https://www.youtube.com/playlist?list=WL',
3186 'only_matching': True,
3188 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3189 'only_matching': True,
3192 def _real_extract(self, url):
3193 _, video = self._check_download_just_video(url, 'WL')
3196 _, playlist = self._extract_playlist('WL')
3200 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3201 IE_NAME = 'youtube:favorites'
3202 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3203 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3204 _LOGIN_REQUIRED = True
3206 def _real_extract(self, url):
3207 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3208 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3209 return self.url_result(playlist_id, 'YoutubePlaylist')
3212 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3213 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3214 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3215 _FEED_NAME = 'recommended'
3216 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3219 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3220 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3221 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3222 _FEED_NAME = 'subscriptions'
3223 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3226 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3227 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3228 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3229 _FEED_NAME = 'history'
3230 _PLAYLIST_TITLE = 'Youtube History'
3233 class YoutubeTruncatedURLIE(InfoExtractor):
3234 IE_NAME = 'youtube:truncated_url'
3235 IE_DESC = False # Do not list
3236 _VALID_URL = r'''(?x)
3238 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3241 annotation_id=annotation_[^&]+|
3247 attribution_link\?a=[^&]+
3253 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3254 'only_matching': True,
3256 'url': 'https://www.youtube.com/watch?',
3257 'only_matching': True,
3259 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3260 'only_matching': True,
3262 'url': 'https://www.youtube.com/watch?feature=foo',
3263 'only_matching': True,
3265 'url': 'https://www.youtube.com/watch?hl=en-GB',
3266 'only_matching': True,
3268 'url': 'https://www.youtube.com/watch?t=2372',
3269 'only_matching': True,
3272 def _real_extract(self, url):
3273 raise ExtractorError(
3274 'Did you forget to quote the URL? Remember that & is a meta '
3275 'character in most shells, so you want to put the URL in quotes, '
3277 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3278 ' or simply youtube-dl BaW_jenozKc .',
3282 class YoutubeTruncatedIDIE(InfoExtractor):
3283 IE_NAME = 'youtube:truncated_id'
3284 IE_DESC = False # Do not list
3285 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3288 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3289 'only_matching': True,
3292 def _real_extract(self, url):
3293 video_id = self._match_id(url)
3294 raise ExtractorError(
3295 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),