3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
59 class YoutubeBaseInfoExtractor(InfoExtractor):
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
74 def _set_language(self):
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
77 # YouTube sets the expire time to about two months
78 expire_time=time.time() + 2 * 30 * 24 * 3600)
80 def _ids_to_results(self, ids):
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
93 username, password = self._get_login_info()
94 # No authentication to be performed
96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
104 if login_page is False:
107 login_form = self._hidden_inputs(login_page)
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
117 'f.req': json.dumps(f_req),
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
133 self._downloader.report_warning(message)
137 None, [], None, 'US', None, None, 2, False, True,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
143 1, [None, None, []], None, None, None, True
148 lookup_results = req(
149 self._LOOKUP_URL, lookup_req,
150 'Looking up account info', 'Unable to look up account info')
152 if lookup_results is False:
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
157 warn('Unable to extract user hash')
162 None, 1, None, [1, None, None, None, [password, None, True]],
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
172 if challenge_results is False:
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
185 warn('Unable to extract result entry')
188 login_challenge = try_get(res, lambda x: x[0][0], list)
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
194 status = try_get(login_challenge, lambda x: x[5], compat_str)
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
201 warn('Unable to extract TL')
204 tfa_code = self._get_tfa_info('2-step verification code')
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
212 tfa_code = remove_start(tfa_code, 'G-')
215 user_hash, None, 2, None,
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
225 if tfa_results is False:
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
244 challenge = CHALLENGES.get(
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
256 check_cookie_results = self._download_webpage(
257 check_cookie_url, None, 'Checking cookie', fatal=False)
259 if check_cookie_results is False:
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
268 def _download_webpage_handle(self, *args, **kwargs):
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
273 *args, **compat_kwargs(kwargs))
275 def _real_initialize(self):
276 if self._downloader is None:
279 if not self._login():
283 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
284 # Extract entries from page with "Load more" button
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
288 for entry in self._process_page(content_html):
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
297 while count <= retries:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
319 more_widget_html = more['load_more_widget_html']
322 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
332 video_id = mobj.group('id')
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
336 video_title = video_title.strip()
337 if video_title == '► Play all':
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
347 def extract_videos_from_page(self, page):
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
352 return zip(ids_in_page, titles_in_page)
355 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
366 title = self._og_search_title(webpage, fatal=False)
367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
370 class YoutubeIE(YoutubeBaseInfoExtractor):
371 IE_DESC = 'YouTube.com'
372 _VALID_URL = r"""(?x)^
374 (?:https?://|//) # http(s):// or protocol-independent URL
375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
376 (?:www\.)?deturl\.com/www\.youtube\.com/|
377 (?:www\.)?pwnyoutube\.com/|
378 (?:www\.)?hooktube\.com/|
379 (?:www\.)?yourepeat\.com/|
380 tube\.majestyc\.net/|
381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
382 (?:(?:www|dev)\.)?invidio\.us/|
383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
385 (?:www\.)?invidious\.kabi\.tk/|
386 (?:www\.)?invidious\.enkirton\.net/|
387 (?:www\.)?invidious\.13ad\.de/|
388 (?:www\.)?invidious\.mastodon\.host/|
389 (?:www\.)?invidious\.nixnet\.xyz/|
390 (?:www\.)?invidious\.drycat\.fr/|
391 (?:www\.)?tube\.poal\.co/|
392 (?:www\.)?vid\.wxzm\.sx/|
393 (?:www\.)?yt\.elukerio\.org/|
394 (?:www\.)?yt\.lelux\.fi/|
395 (?:www\.)?kgg2m7yk5aybusll\.onion/|
396 (?:www\.)?qklhadlycap4cnod\.onion/|
397 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
398 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
399 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
400 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
401 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
402 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
403 (?:.*?\#/)? # handle anchor (#/) redirect urls
404 (?: # the various things that can precede the ID:
405 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
406 |(?: # or the v= param in all its forms
407 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
408 (?:\?|\#!?) # the params delimiter ? or # or #!
409 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
414 youtu\.be| # just youtu.be/xxxx
415 vid\.plus| # or vid.plus/xxxx
416 zwearz\.com/watch| # or zwearz.com/watch/xxxx
418 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
420 )? # all until now is optional -> you can pass the naked ID
421 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
424 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
425 WL # WL are handled by the watch later IE
428 (?(1).+)? # if we found the ID, everything can follow
429 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
430 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
432 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
434 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
435 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
436 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
437 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
438 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
440 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
441 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
442 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
444 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
446 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
447 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
448 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
453 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
455 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
456 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
457 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
458 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
461 # Apple HTTP Live Streaming
462 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
464 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
466 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
467 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
468 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
469 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
472 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
478 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
483 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
487 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
488 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
489 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
491 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
492 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
495 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
502 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
511 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
515 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
519 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
520 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
522 # Dash webm audio with opus inside
523 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
524 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
525 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
528 '_rtmp': {'protocol': 'rtmp'},
530 # av01 video only formats sometimes served with "unknown" codecs
531 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
543 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
547 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
548 'uploader': 'Philipp Hagemeister',
549 'uploader_id': 'phihag',
550 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
551 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
552 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
553 'upload_date': '20121002',
554 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
555 'categories': ['Science & Technology'],
556 'tags': ['youtube-dl'],
560 'dislike_count': int,
566 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
567 'note': 'Test generic use_cipher_signature video (#897)',
571 'upload_date': '20120506',
572 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
573 'alt_title': 'I Love It (feat. Charli XCX)',
574 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
575 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
576 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
577 'iconic ep', 'iconic', 'love', 'it'],
579 'uploader': 'Icona Pop',
580 'uploader_id': 'IconaPop',
581 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
582 'creator': 'Icona Pop',
583 'track': 'I Love It (feat. Charli XCX)',
584 'artist': 'Icona Pop',
588 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
589 'note': 'Test VEVO video with age protection (#956)',
593 'upload_date': '20130703',
594 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
595 'alt_title': 'Tunnel Vision',
596 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
598 'uploader': 'justintimberlakeVEVO',
599 'uploader_id': 'justintimberlakeVEVO',
600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
601 'creator': 'Justin Timberlake',
602 'track': 'Tunnel Vision',
603 'artist': 'Justin Timberlake',
608 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
609 'note': 'Embed-only video (#1746)',
613 'upload_date': '20120608',
614 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
615 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
616 'uploader': 'SET India',
617 'uploader_id': 'setindia',
618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
623 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
624 'note': 'Use the first video ID in the URL',
628 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
629 'uploader': 'Philipp Hagemeister',
630 'uploader_id': 'phihag',
631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
632 'upload_date': '20121002',
633 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
634 'categories': ['Science & Technology'],
635 'tags': ['youtube-dl'],
639 'dislike_count': int,
642 'skip_download': True,
646 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
647 'note': '256k DASH audio (format 141) via DASH manifest',
651 'upload_date': '20121002',
652 'uploader_id': '8KVIDEO',
653 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
655 'uploader': '8KVIDEO',
656 'title': 'UHDTV TEST 8K VIDEO.mp4'
659 'youtube_include_dash_manifest': True,
662 'skip': 'format 141 not served anymore',
664 # DASH manifest with encrypted signature
666 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
670 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
671 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
673 'uploader': 'AfrojackVEVO',
674 'uploader_id': 'AfrojackVEVO',
675 'upload_date': '20131011',
678 'youtube_include_dash_manifest': True,
679 'format': '141/bestaudio[ext=m4a]',
682 # JS player signature function name containing $
684 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
688 'title': 'Taylor Swift - Shake It Off',
689 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
691 'uploader': 'TaylorSwiftVEVO',
692 'uploader_id': 'TaylorSwiftVEVO',
693 'upload_date': '20140818',
694 'creator': 'Taylor Swift',
697 'youtube_include_dash_manifest': True,
698 'format': '141/bestaudio[ext=m4a]',
703 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
708 'upload_date': '20100909',
709 'uploader': 'Amazing Atheist',
710 'uploader_id': 'TheAmazingAtheist',
711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
712 'title': 'Burning Everyone\'s Koran',
713 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
716 # Normal age-gate video (No vevo, embed allowed)
718 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
722 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
723 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
725 'uploader': 'The Witcher',
726 'uploader_id': 'WitcherGame',
727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
728 'upload_date': '20140605',
732 # Age-gate video with encrypted signature
734 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
738 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
739 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
741 'uploader': 'LloydVEVO',
742 'uploader_id': 'LloydVEVO',
743 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
744 'upload_date': '20110629',
748 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
749 # YouTube Red ad is not captured for creator
751 'url': '__2ABJjxzNo',
756 'upload_date': '20100430',
757 'uploader_id': 'deadmau5',
758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
759 'creator': 'deadmau5',
760 'description': 'md5:12c56784b8032162bb936a5f76d55360',
761 'uploader': 'deadmau5',
762 'title': 'Deadmau5 - Some Chords (HD)',
763 'alt_title': 'Some Chords',
765 'expected_warnings': [
766 'DASH manifest missing',
769 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
771 'url': 'lqQg6PlCWgI',
776 'upload_date': '20150827',
777 'uploader_id': 'olympic',
778 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
779 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
780 'uploader': 'Olympic',
781 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
784 'skip_download': 'requires avconv',
789 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
793 'stretched_ratio': 16 / 9.,
795 'upload_date': '20110310',
796 'uploader_id': 'AllenMeow',
797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
798 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
800 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
803 # url_encoded_fmt_stream_map is empty string
805 'url': 'qEJwOuvDf7I',
809 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
811 'upload_date': '20150404',
812 'uploader_id': 'spbelect',
813 'uploader': 'Наблюдатели Петербурга',
816 'skip_download': 'requires avconv',
818 'skip': 'This live event has ended.',
820 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
822 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
826 'title': 'md5:7b81415841e02ecd4313668cde88737a',
827 'description': 'md5:116377fd2963b81ec4ce64b542173306',
829 'upload_date': '20150625',
830 'uploader_id': 'dorappi2000',
831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
832 'uploader': 'dorappi2000',
833 'formats': 'mincount:31',
835 'skip': 'not actual anymore',
837 # DASH manifest with segment_list
839 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
840 'md5': '8ce563a1d667b599d21064e982ab9e31',
844 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
845 'uploader': 'Airtek',
846 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
847 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
848 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
851 'youtube_include_dash_manifest': True,
852 'format': '135', # bestvideo
854 'skip': 'This live event has ended.',
857 # Multifeed videos (multiple cameras), URL is for Main Camera
858 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
861 'title': 'teamPGP: Rocket League Noob Stream',
862 'description': 'md5:dc7872fb300e143831327f1bae3af010',
868 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
869 'description': 'md5:dc7872fb300e143831327f1bae3af010',
871 'upload_date': '20150721',
872 'uploader': 'Beer Games Beer',
873 'uploader_id': 'beergamesbeer',
874 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
875 'license': 'Standard YouTube License',
881 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
882 'description': 'md5:dc7872fb300e143831327f1bae3af010',
884 'upload_date': '20150721',
885 'uploader': 'Beer Games Beer',
886 'uploader_id': 'beergamesbeer',
887 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
888 'license': 'Standard YouTube License',
894 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
895 'description': 'md5:dc7872fb300e143831327f1bae3af010',
897 'upload_date': '20150721',
898 'uploader': 'Beer Games Beer',
899 'uploader_id': 'beergamesbeer',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
901 'license': 'Standard YouTube License',
907 'title': 'teamPGP: Rocket League Noob Stream (zim)',
908 'description': 'md5:dc7872fb300e143831327f1bae3af010',
910 'upload_date': '20150721',
911 'uploader': 'Beer Games Beer',
912 'uploader_id': 'beergamesbeer',
913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
914 'license': 'Standard YouTube License',
918 'skip_download': True,
920 'skip': 'This video is not available.',
923 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
924 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
927 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
930 'skip': 'Not multifeed anymore',
933 'url': 'https://vid.plus/FlRa-iH7PGw',
934 'only_matching': True,
937 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
938 'only_matching': True,
941 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
942 # Also tests cut-off URL expansion in video description (see
943 # https://github.com/ytdl-org/youtube-dl/issues/1892,
944 # https://github.com/ytdl-org/youtube-dl/issues/8164)
945 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
949 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
950 'alt_title': 'Dark Walk - Position Music',
951 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
953 'upload_date': '20151119',
954 'uploader_id': 'IronSoulElf',
955 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
956 'uploader': 'IronSoulElf',
957 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
958 'track': 'Dark Walk - Position Music',
959 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
960 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
963 'skip_download': True,
967 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
968 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
969 'only_matching': True,
972 # Video with yt:stretch=17:0
973 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
977 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
978 'description': 'md5:ee18a25c350637c8faff806845bddee9',
979 'upload_date': '20151107',
980 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
981 'uploader': 'CH GAMER DROID',
984 'skip_download': True,
986 'skip': 'This video does not exist.',
989 # Video licensed under Creative Commons
990 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
994 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
995 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
997 'upload_date': '20150127',
998 'uploader_id': 'BerkmanCenter',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1000 'uploader': 'The Berkman Klein Center for Internet & Society',
1001 'license': 'Creative Commons Attribution license (reuse allowed)',
1004 'skip_download': True,
1008 # Channel-like uploader_url
1009 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1011 'id': 'eQcmzGIKrzg',
1013 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1014 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1016 'upload_date': '20151119',
1017 'uploader': 'Bernie Sanders',
1018 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1019 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1020 'license': 'Creative Commons Attribution license (reuse allowed)',
1023 'skip_download': True,
1027 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1028 'only_matching': True,
1031 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1032 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1033 'only_matching': True,
1036 # Rental video preview
1037 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1039 'id': 'uGpuVWrhIzE',
1041 'title': 'Piku - Trailer',
1042 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1043 'upload_date': '20150811',
1044 'uploader': 'FlixMatrix',
1045 'uploader_id': 'FlixMatrixKaravan',
1046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1047 'license': 'Standard YouTube License',
1050 'skip_download': True,
1052 'skip': 'This video is not available.',
1055 # YouTube Red video with episode data
1056 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1058 'id': 'iqKdEhx-dD4',
1060 'title': 'Isolation - Mind Field (Ep 1)',
1061 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1063 'upload_date': '20170118',
1064 'uploader': 'Vsauce',
1065 'uploader_id': 'Vsauce',
1066 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1067 'series': 'Mind Field',
1069 'episode_number': 1,
1072 'skip_download': True,
1074 'expected_warnings': [
1075 'Skipping DASH manifest',
1079 # The following content has been identified by the YouTube community
1080 # as inappropriate or offensive to some audiences.
1081 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1083 'id': '6SJNVb0GnPI',
1085 'title': 'Race Differences in Intelligence',
1086 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1088 'upload_date': '20140124',
1089 'uploader': 'New Century Foundation',
1090 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1094 'skip_download': True,
1099 'url': '1t24XAntNCY',
1100 'only_matching': True,
1103 # geo restricted to JP
1104 'url': 'sJL6WA-aGkQ',
1105 'only_matching': True,
1108 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1109 'only_matching': True,
1112 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1113 'only_matching': True,
1117 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1118 'only_matching': True,
1121 # Video with unsupported adaptive stream type formats
1122 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1124 'id': 'Z4Vy8R84T1U',
1126 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1127 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1129 'upload_date': '20130923',
1130 'uploader': 'Amelia Putri Harwita',
1131 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1132 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1133 'formats': 'maxcount:10',
1136 'skip_download': True,
1137 'youtube_include_dash_manifest': False,
1141 # Youtube Music Auto-generated description
1142 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1144 'id': 'MgNrAu2pzNs',
1146 'title': 'Voyeur Girl',
1147 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1148 'upload_date': '20190312',
1149 'uploader': 'Various Artists - Topic',
1150 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1151 'artist': 'Stephen',
1152 'track': 'Voyeur Girl',
1153 'album': 'it\'s too much love to know my dear',
1154 'release_date': '20190313',
1155 'release_year': 2019,
1158 'skip_download': True,
1162 # Youtube Music Auto-generated description
1163 # Retrieve 'artist' field from 'Artist:' in video description
1164 # when it is present on youtube music video
1165 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1167 'id': 'k0jLE7tTwjY',
1169 'title': 'Latch Feat. Sam Smith',
1170 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1171 'upload_date': '20150110',
1172 'uploader': 'Various Artists - Topic',
1173 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1174 'artist': 'Disclosure',
1175 'track': 'Latch Feat. Sam Smith',
1176 'album': 'Latch Featuring Sam Smith',
1177 'release_date': '20121008',
1178 'release_year': 2012,
1181 'skip_download': True,
1185 # Youtube Music Auto-generated description
1186 # handle multiple artists on youtube music video
1187 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1189 'id': '74qn0eJSjpA',
1191 'title': 'Eastside',
1192 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1193 'upload_date': '20180710',
1194 'uploader': 'Benny Blanco - Topic',
1195 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1196 'artist': 'benny blanco, Halsey, Khalid',
1197 'track': 'Eastside',
1198 'album': 'Eastside',
1199 'release_date': '20180713',
1200 'release_year': 2018,
1203 'skip_download': True,
1207 # Youtube Music Auto-generated description
1208 # handle youtube music video with release_year and no release_date
1209 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1211 'id': '-hcAI0g-f5M',
1213 'title': 'Put It On Me',
1214 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1215 'upload_date': '20180426',
1216 'uploader': 'Matt Maeson - Topic',
1217 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1218 'artist': 'Matt Maeson',
1219 'track': 'Put It On Me',
1220 'album': 'The Hearse',
1221 'release_date': None,
1222 'release_year': 2018,
1225 'skip_download': True,
1230 def __init__(self, *args, **kwargs):
1231 super(YoutubeIE, self).__init__(*args, **kwargs)
1232 self._player_cache = {}
1234 def report_video_info_webpage_download(self, video_id):
1235 """Report attempt to download video info webpage."""
1236 self.to_screen('%s: Downloading video info webpage' % video_id)
1238 def report_information_extraction(self, video_id):
1239 """Report attempt to extract video information."""
1240 self.to_screen('%s: Extracting video information' % video_id)
1242 def report_unavailable_format(self, video_id, format):
1243 """Report extracted video URL."""
1244 self.to_screen('%s: Format %s not available' % (video_id, format))
1246 def report_rtmp_download(self):
1247 """Indicate the download will use the RTMP protocol."""
1248 self.to_screen('RTMP download detected')
1250 def _signature_cache_id(self, example_sig):
1251 """ Return a string representation of a signature """
1252 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1254 def _extract_signature_function(self, video_id, player_url, example_sig):
1256 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1259 raise ExtractorError('Cannot identify player %r' % player_url)
1260 player_type = id_m.group('ext')
1261 player_id = id_m.group('id')
1263 # Read from filesystem cache
1264 func_id = '%s_%s_%s' % (
1265 player_type, player_id, self._signature_cache_id(example_sig))
1266 assert os.path.basename(func_id) == func_id
1268 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1269 if cache_spec is not None:
1270 return lambda s: ''.join(s[i] for i in cache_spec)
1273 'Downloading player %s' % player_url
1274 if self._downloader.params.get('verbose') else
1275 'Downloading %s player %s' % (player_type, player_id)
1277 if player_type == 'js':
1278 code = self._download_webpage(
1279 player_url, video_id,
1281 errnote='Download of %s failed' % player_url)
1282 res = self._parse_sig_js(code)
1283 elif player_type == 'swf':
1284 urlh = self._request_webpage(
1285 player_url, video_id,
1287 errnote='Download of %s failed' % player_url)
1289 res = self._parse_sig_swf(code)
1291 assert False, 'Invalid player type %r' % player_type
1293 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1294 cache_res = res(test_string)
1295 cache_spec = [ord(c) for c in cache_res]
1297 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1300 def _print_sig_code(self, func, example_sig):
1301 def gen_sig_code(idxs):
1302 def _genslice(start, end, step):
1303 starts = '' if start == 0 else str(start)
1304 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1305 steps = '' if step == 1 else (':%d' % step)
1306 return 's[%s%s%s]' % (starts, ends, steps)
1309 # Quelch pyflakes warnings - start will be set when step is set
1310 start = '(Never used)'
1311 for i, prev in zip(idxs[1:], idxs[:-1]):
1312 if step is not None:
1313 if i - prev == step:
1315 yield _genslice(start, prev, step)
1318 if i - prev in [-1, 1]:
1323 yield 's[%d]' % prev
1327 yield _genslice(start, i, step)
1329 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1330 cache_res = func(test_string)
1331 cache_spec = [ord(c) for c in cache_res]
1332 expr_code = ' + '.join(gen_sig_code(cache_spec))
1333 signature_id_tuple = '(%s)' % (
1334 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1335 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1336 ' return %s\n') % (signature_id_tuple, expr_code)
1337 self.to_screen('Extracted signature function:\n' + code)
1339 def _parse_sig_js(self, jscode):
1340 funcname = self._search_regex(
1341 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1343 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1345 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1347 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1353 jscode, 'Initial JS player signature function name', group='sig')
1355 jsi = JSInterpreter(jscode)
1356 initial_function = jsi.extract_function(funcname)
1357 return lambda s: initial_function([s])
1359 def _parse_sig_swf(self, file_contents):
1360 swfi = SWFInterpreter(file_contents)
1361 TARGET_CLASSNAME = 'SignatureDecipher'
1362 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1363 initial_function = swfi.extract_function(searched_class, 'decipher')
1364 return lambda s: initial_function([s])
1366 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1367 """Turn the encrypted s field into a working signature"""
1369 if player_url is None:
1370 raise ExtractorError('Cannot decrypt signature without player_url')
1372 if player_url.startswith('//'):
1373 player_url = 'https:' + player_url
1374 elif not re.match(r'https?://', player_url):
1375 player_url = compat_urlparse.urljoin(
1376 'https://www.youtube.com', player_url)
1378 player_id = (player_url, self._signature_cache_id(s))
1379 if player_id not in self._player_cache:
1380 func = self._extract_signature_function(
1381 video_id, player_url, s
1383 self._player_cache[player_id] = func
1384 func = self._player_cache[player_id]
1385 if self._downloader.params.get('youtube_print_sig_code'):
1386 self._print_sig_code(func, s)
1388 except Exception as e:
1389 tb = traceback.format_exc()
1390 raise ExtractorError(
1391 'Signature extraction failed: ' + tb, cause=e)
1393 def _get_subtitles(self, video_id, webpage):
1395 subs_doc = self._download_xml(
1396 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1397 video_id, note=False)
1398 except ExtractorError as err:
1399 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1403 for track in subs_doc.findall('track'):
1404 lang = track.attrib['lang_code']
1405 if lang in sub_lang_list:
1408 for ext in self._SUBTITLE_FORMATS:
1409 params = compat_urllib_parse_urlencode({
1413 'name': track.attrib['name'].encode('utf-8'),
1415 sub_formats.append({
1416 'url': 'https://www.youtube.com/api/timedtext?' + params,
1419 sub_lang_list[lang] = sub_formats
1420 if not sub_lang_list:
1421 self._downloader.report_warning('video doesn\'t have subtitles')
1423 return sub_lang_list
1425 def _get_ytplayer_config(self, video_id, webpage):
1427 # User data may contain arbitrary character sequences that may affect
1428 # JSON extraction with regex, e.g. when '};' is contained the second
1429 # regex won't capture the whole JSON. Yet working around by trying more
1430 # concrete regex first keeping in mind proper quoted string handling
1431 # to be implemented in future that will replace this workaround (see
1432 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1433 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1434 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1435 r';ytplayer\.config\s*=\s*({.+?});',
1437 config = self._search_regex(
1438 patterns, webpage, 'ytplayer.config', default=None)
1440 return self._parse_json(
1441 uppercase_escape(config), video_id, fatal=False)
1443 def _get_automatic_captions(self, video_id, webpage):
1444 """We need the webpage for getting the captions url, pass it as an
1445 argument to speed up the process."""
1446 self.to_screen('%s: Looking for automatic captions' % video_id)
1447 player_config = self._get_ytplayer_config(video_id, webpage)
1448 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1449 if not player_config:
1450 self._downloader.report_warning(err_msg)
1453 args = player_config['args']
1454 caption_url = args.get('ttsurl')
1456 timestamp = args['timestamp']
1457 # We get the available subtitles
1458 list_params = compat_urllib_parse_urlencode({
1463 list_url = caption_url + '&' + list_params
1464 caption_list = self._download_xml(list_url, video_id)
1465 original_lang_node = caption_list.find('track')
1466 if original_lang_node is None:
1467 self._downloader.report_warning('Video doesn\'t have automatic captions')
1469 original_lang = original_lang_node.attrib['lang_code']
1470 caption_kind = original_lang_node.attrib.get('kind', '')
1473 for lang_node in caption_list.findall('target'):
1474 sub_lang = lang_node.attrib['lang_code']
1476 for ext in self._SUBTITLE_FORMATS:
1477 params = compat_urllib_parse_urlencode({
1478 'lang': original_lang,
1482 'kind': caption_kind,
1484 sub_formats.append({
1485 'url': caption_url + '&' + params,
1488 sub_lang_list[sub_lang] = sub_formats
1489 return sub_lang_list
1491 def make_captions(sub_url, sub_langs):
1492 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1493 caption_qs = compat_parse_qs(parsed_sub_url.query)
1495 for sub_lang in sub_langs:
1497 for ext in self._SUBTITLE_FORMATS:
1499 'tlang': [sub_lang],
1502 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1503 query=compat_urllib_parse_urlencode(caption_qs, True)))
1504 sub_formats.append({
1508 captions[sub_lang] = sub_formats
1511 # New captions format as of 22.06.2017
1512 player_response = args.get('player_response')
1513 if player_response and isinstance(player_response, compat_str):
1514 player_response = self._parse_json(
1515 player_response, video_id, fatal=False)
1517 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1518 base_url = renderer['captionTracks'][0]['baseUrl']
1520 for lang in renderer['translationLanguages']:
1521 lang_code = lang.get('languageCode')
1523 sub_lang_list.append(lang_code)
1524 return make_captions(base_url, sub_lang_list)
1526 # Some videos don't provide ttsurl but rather caption_tracks and
1527 # caption_translation_languages (e.g. 20LmZk1hakA)
1528 # Does not used anymore as of 22.06.2017
1529 caption_tracks = args['caption_tracks']
1530 caption_translation_languages = args['caption_translation_languages']
1531 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1533 for lang in caption_translation_languages.split(','):
1534 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1535 sub_lang = lang_qs.get('lc', [None])[0]
1537 sub_lang_list.append(sub_lang)
1538 return make_captions(caption_url, sub_lang_list)
1539 # An extractor error can be raise by the download process if there are
1540 # no automatic captions but there are subtitles
1541 except (KeyError, IndexError, ExtractorError):
1542 self._downloader.report_warning(err_msg)
1545 def _mark_watched(self, video_id, video_info, player_response):
1546 playback_url = url_or_none(try_get(
1548 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1549 video_info, lambda x: x['videostats_playback_base_url'][0]))
1550 if not playback_url:
1552 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1553 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1555 # cpn generation algorithm is reverse engineered from base.js.
1556 # In fact it works even with dummy cpn.
1557 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1558 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1564 playback_url = compat_urlparse.urlunparse(
1565 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1567 self._download_webpage(
1568 playback_url, video_id, 'Marking watched',
1569 'Unable to mark watched', fatal=False)
1572 def _extract_urls(webpage):
1573 # Embedded YouTube player
1575 unescapeHTML(mobj.group('url'))
1576 for mobj in re.finditer(r'''(?x)
1586 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1587 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1590 # lazyYT YouTube embed
1591 entries.extend(list(map(
1593 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1595 # Wordpress "YouTube Video Importer" plugin
1596 matches = re.findall(r'''(?x)<div[^>]+
1597 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1598 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1599 entries.extend(m[-1] for m in matches)
1604 def _extract_url(webpage):
1605 urls = YoutubeIE._extract_urls(webpage)
1606 return urls[0] if urls else None
1609 def extract_id(cls, url):
1610 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1612 raise ExtractorError('Invalid URL: %s' % url)
1613 video_id = mobj.group(2)
1617 def _extract_chapters(description, duration):
1620 chapter_lines = re.findall(
1621 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1623 if not chapter_lines:
1626 for next_num, (chapter_line, time_point) in enumerate(
1627 chapter_lines, start=1):
1628 start_time = parse_duration(time_point)
1629 if start_time is None:
1631 if start_time > duration:
1633 end_time = (duration if next_num == len(chapter_lines)
1634 else parse_duration(chapter_lines[next_num][1]))
1635 if end_time is None:
1637 if end_time > duration:
1639 if start_time > end_time:
1641 chapter_title = re.sub(
1642 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1643 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1645 'start_time': start_time,
1646 'end_time': end_time,
1647 'title': chapter_title,
1651 def _real_extract(self, url):
1652 url, smuggled_data = unsmuggle_url(url, {})
1655 'http' if self._downloader.params.get('prefer_insecure', False)
1660 parsed_url = compat_urllib_parse_urlparse(url)
1661 for component in [parsed_url.fragment, parsed_url.query]:
1662 query = compat_parse_qs(component)
1663 if start_time is None and 't' in query:
1664 start_time = parse_duration(query['t'][0])
1665 if start_time is None and 'start' in query:
1666 start_time = parse_duration(query['start'][0])
1667 if end_time is None and 'end' in query:
1668 end_time = parse_duration(query['end'][0])
1670 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1671 mobj = re.search(self._NEXT_URL_RE, url)
1673 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1674 video_id = self.extract_id(url)
1677 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1678 video_webpage = self._download_webpage(url, video_id)
1680 # Attempt to extract SWF player URL
1681 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1682 if mobj is not None:
1683 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1689 def add_dash_mpd(video_info):
1690 dash_mpd = video_info.get('dashmpd')
1691 if dash_mpd and dash_mpd[0] not in dash_mpds:
1692 dash_mpds.append(dash_mpd[0])
1694 def add_dash_mpd_pr(pl_response):
1695 dash_mpd = url_or_none(try_get(
1696 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1698 if dash_mpd and dash_mpd not in dash_mpds:
1699 dash_mpds.append(dash_mpd)
1704 def extract_view_count(v_info):
1705 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1707 def extract_token(v_info):
1708 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1710 def extract_player_response(player_response, video_id):
1711 pl_response = str_or_none(player_response)
1714 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1715 if isinstance(pl_response, dict):
1716 add_dash_mpd_pr(pl_response)
1719 player_response = {}
1722 embed_webpage = None
1723 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1725 # We simulate the access to the video from www.youtube.com/v/{video_id}
1726 # this can be viewed without login into Youtube
1727 url = proto + '://www.youtube.com/embed/%s' % video_id
1728 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1729 data = compat_urllib_parse_urlencode({
1730 'video_id': video_id,
1731 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1732 'sts': self._search_regex(
1733 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1735 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1736 video_info_webpage = self._download_webpage(
1737 video_info_url, video_id,
1738 note='Refetching age-gated info webpage',
1739 errnote='unable to download video info webpage')
1740 video_info = compat_parse_qs(video_info_webpage)
1741 pl_response = video_info.get('player_response', [None])[0]
1742 player_response = extract_player_response(pl_response, video_id)
1743 add_dash_mpd(video_info)
1744 view_count = extract_view_count(video_info)
1749 # Try looking directly into the video webpage
1750 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1752 args = ytplayer_config['args']
1753 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1754 # Convert to the same format returned by compat_parse_qs
1755 video_info = dict((k, [v]) for k, v in args.items())
1756 add_dash_mpd(video_info)
1757 # Rental video is not rented but preview is available (e.g.
1758 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1759 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1760 if not video_info and args.get('ypc_vid'):
1761 return self.url_result(
1762 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1763 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1765 sts = ytplayer_config.get('sts')
1766 if not player_response:
1767 player_response = extract_player_response(args.get('player_response'), video_id)
1768 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1769 add_dash_mpd_pr(player_response)
1770 # We also try looking in get_video_info since it may contain different dashmpd
1771 # URL that points to a DASH manifest with possibly different itag set (some itags
1772 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1773 # manifest pointed by get_video_info's dashmpd).
1774 # The general idea is to take a union of itags of both DASH manifests (for example
1775 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1776 self.report_video_info_webpage_download(video_id)
1777 for el in ('embedded', 'detailpage', 'vevo', ''):
1779 'video_id': video_id,
1789 video_info_webpage = self._download_webpage(
1790 '%s://www.youtube.com/get_video_info' % proto,
1791 video_id, note=False,
1792 errnote='unable to download video info webpage',
1793 fatal=False, query=query)
1794 if not video_info_webpage:
1796 get_video_info = compat_parse_qs(video_info_webpage)
1797 if not player_response:
1798 pl_response = get_video_info.get('player_response', [None])[0]
1799 player_response = extract_player_response(pl_response, video_id)
1800 add_dash_mpd(get_video_info)
1801 if view_count is None:
1802 view_count = extract_view_count(get_video_info)
1804 video_info = get_video_info
1805 get_token = extract_token(get_video_info)
1807 # Different get_video_info requests may report different results, e.g.
1808 # some may report video unavailability, but some may serve it without
1809 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1810 # the original webpage as well as el=info and el=embedded get_video_info
1811 # requests report video unavailability due to geo restriction while
1812 # el=detailpage succeeds and returns valid data). This is probably
1813 # due to YouTube measures against IP ranges of hosting providers.
1814 # Working around by preferring the first succeeded video_info containing
1815 # the token if no such video_info yet was found.
1816 token = extract_token(video_info)
1818 video_info = get_video_info
1821 def extract_unavailable_message():
1823 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1824 msg = self._html_search_regex(
1825 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1826 video_webpage, 'unavailable %s' % kind, default=None)
1828 messages.append(msg)
1830 return '\n'.join(messages)
1833 unavailable_message = extract_unavailable_message()
1834 if not unavailable_message:
1835 unavailable_message = 'Unable to extract video data'
1836 raise ExtractorError(
1837 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1839 video_details = try_get(
1840 player_response, lambda x: x['videoDetails'], dict) or {}
1842 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1844 self._downloader.report_warning('Unable to extract video title')
1847 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1848 if video_description:
1851 redir_url = compat_urlparse.urljoin(url, m.group(1))
1852 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1853 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1854 qs = compat_parse_qs(parsed_redir_url.query)
1860 description_original = video_description = re.sub(r'''(?x)
1862 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1863 (?:title|href)="([^"]+)"\s+
1864 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1868 ''', replace_url, video_description)
1869 video_description = clean_html(video_description)
1871 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1873 if not smuggled_data.get('force_singlefeed', False):
1874 if not self._downloader.params.get('noplaylist'):
1875 multifeed_metadata_list = try_get(
1877 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1878 compat_str) or try_get(
1879 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1880 if multifeed_metadata_list:
1883 for feed in multifeed_metadata_list.split(','):
1884 # Unquote should take place before split on comma (,) since textual
1885 # fields may contain comma as well (see
1886 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1887 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1889 '_type': 'url_transparent',
1890 'ie_key': 'Youtube',
1892 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1893 {'force_singlefeed': True}),
1894 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1896 feed_ids.append(feed_data['id'][0])
1898 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1899 % (', '.join(feed_ids), video_id))
1900 return self.playlist_result(entries, video_id, video_title, video_description)
1902 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1904 if view_count is None:
1905 view_count = extract_view_count(video_info)
1906 if view_count is None and video_details:
1907 view_count = int_or_none(video_details.get('viewCount'))
1910 is_live = bool_or_none(video_details.get('isLive'))
1912 # Check for "rental" videos
1913 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1914 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1916 def _extract_filesize(media_url):
1917 return int_or_none(self._search_regex(
1918 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1920 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1921 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1923 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1924 self.report_rtmp_download()
1926 'format_id': '_rtmp',
1928 'url': video_info['conn'][0],
1929 'player_url': player_url,
1931 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1932 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1933 if 'rtmpe%3Dyes' in encoded_url_map:
1934 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1937 fmt_list = video_info.get('fmt_list', [''])[0]
1939 for fmt in fmt_list.split(','):
1940 spec = fmt.split('/')
1942 width_height = spec[1].split('x')
1943 if len(width_height) == 2:
1944 formats_spec[spec[0]] = {
1945 'resolution': spec[1],
1946 'width': int_or_none(width_height[0]),
1947 'height': int_or_none(width_height[1]),
1949 for fmt in streaming_formats:
1950 itag = str_or_none(fmt.get('itag'))
1953 quality = fmt.get('quality')
1954 quality_label = fmt.get('qualityLabel') or quality
1955 formats_spec[itag] = {
1956 'asr': int_or_none(fmt.get('audioSampleRate')),
1957 'filesize': int_or_none(fmt.get('contentLength')),
1958 'format_note': quality_label,
1959 'fps': int_or_none(fmt.get('fps')),
1960 'height': int_or_none(fmt.get('height')),
1961 # bitrate for itag 43 is always 2147483647
1962 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1963 'width': int_or_none(fmt.get('width')),
1966 for fmt in streaming_formats:
1967 if fmt.get('drm_families'):
1969 url = url_or_none(fmt.get('url'))
1972 cipher = fmt.get('cipher')
1975 url_data = compat_parse_qs(cipher)
1976 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1981 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1983 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1984 # Unsupported FORMAT_STREAM_TYPE_OTF
1985 if stream_type == 3:
1988 format_id = fmt.get('itag') or url_data['itag'][0]
1991 format_id = compat_str(format_id)
1994 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1995 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1996 jsplayer_url_json = self._search_regex(
1998 embed_webpage if age_gate else video_webpage,
1999 'JS player URL (1)', default=None)
2000 if not jsplayer_url_json and not age_gate:
2001 # We need the embed website after all
2002 if embed_webpage is None:
2003 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2004 embed_webpage = self._download_webpage(
2005 embed_url, video_id, 'Downloading embed webpage')
2006 jsplayer_url_json = self._search_regex(
2007 ASSETS_RE, embed_webpage, 'JS player URL')
2009 player_url = json.loads(jsplayer_url_json)
2010 if player_url is None:
2011 player_url_json = self._search_regex(
2012 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2013 video_webpage, 'age gate player URL')
2014 player_url = json.loads(player_url_json)
2016 if 'sig' in url_data:
2017 url += '&signature=' + url_data['sig'][0]
2018 elif 's' in url_data:
2019 encrypted_sig = url_data['s'][0]
2021 if self._downloader.params.get('verbose'):
2022 if player_url is None:
2023 player_version = 'unknown'
2024 player_desc = 'unknown'
2026 if player_url.endswith('swf'):
2027 player_version = self._search_regex(
2028 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2029 'flash player', fatal=False)
2030 player_desc = 'flash player %s' % player_version
2032 player_version = self._search_regex(
2033 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2034 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2036 'html5 player', fatal=False)
2037 player_desc = 'html5 player %s' % player_version
2039 parts_sizes = self._signature_cache_id(encrypted_sig)
2040 self.to_screen('{%s} signature length %s, %s' %
2041 (format_id, parts_sizes, player_desc))
2043 signature = self._decrypt_signature(
2044 encrypted_sig, video_id, player_url, age_gate)
2045 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2046 url += '&%s=%s' % (sp, signature)
2047 if 'ratebypass' not in url:
2048 url += '&ratebypass=yes'
2051 'format_id': format_id,
2053 'player_url': player_url,
2055 if format_id in self._formats:
2056 dct.update(self._formats[format_id])
2057 if format_id in formats_spec:
2058 dct.update(formats_spec[format_id])
2060 # Some itags are not included in DASH manifest thus corresponding formats will
2061 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2062 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2063 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2064 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2067 width = int_or_none(fmt.get('width'))
2069 height = int_or_none(fmt.get('height'))
2071 filesize = int_or_none(url_data.get(
2072 'clen', [None])[0]) or _extract_filesize(url)
2074 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2075 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2077 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2078 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2079 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2082 'filesize': filesize,
2087 'format_note': quality_label or quality,
2089 for key, value in more_fields.items():
2092 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2094 type_split = type_.split(';')
2095 kind_ext = type_split[0].split('/')
2096 if len(kind_ext) == 2:
2098 dct['ext'] = mimetype2ext(type_split[0])
2099 if kind in ('audio', 'video'):
2101 for mobj in re.finditer(
2102 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2103 if mobj.group('key') == 'codecs':
2104 codecs = mobj.group('val')
2107 dct.update(parse_codecs(codecs))
2108 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2109 dct['downloader_options'] = {
2110 # Youtube throttles chunks >~10M
2111 'http_chunk_size': 10485760,
2116 url_or_none(try_get(
2118 lambda x: x['streamingData']['hlsManifestUrl'],
2120 or url_or_none(try_get(
2121 video_info, lambda x: x['hlsvp'][0], compat_str)))
2124 m3u8_formats = self._extract_m3u8_formats(
2125 manifest_url, video_id, 'mp4', fatal=False)
2126 for a_format in m3u8_formats:
2127 itag = self._search_regex(
2128 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2130 a_format['format_id'] = itag
2131 if itag in self._formats:
2132 dct = self._formats[itag].copy()
2133 dct.update(a_format)
2135 a_format['player_url'] = player_url
2136 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2137 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2138 formats.append(a_format)
2140 error_message = extract_unavailable_message()
2141 if not error_message:
2142 error_message = clean_html(try_get(
2143 player_response, lambda x: x['playabilityStatus']['reason'],
2145 if not error_message:
2146 error_message = clean_html(
2147 try_get(video_info, lambda x: x['reason'][0], compat_str))
2149 raise ExtractorError(error_message, expected=True)
2150 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2153 video_uploader = try_get(
2154 video_info, lambda x: x['author'][0],
2155 compat_str) or str_or_none(video_details.get('author'))
2157 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2159 self._downloader.report_warning('unable to extract uploader name')
2162 video_uploader_id = None
2163 video_uploader_url = None
2165 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2167 if mobj is not None:
2168 video_uploader_id = mobj.group('uploader_id')
2169 video_uploader_url = mobj.group('uploader_url')
2171 self._downloader.report_warning('unable to extract uploader nickname')
2174 str_or_none(video_details.get('channelId'))
2175 or self._html_search_meta(
2176 'channelId', video_webpage, 'channel id', default=None)
2177 or self._search_regex(
2178 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2179 video_webpage, 'channel id', default=None, group='id'))
2180 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2183 # We try first to get a high quality image:
2184 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2185 video_webpage, re.DOTALL)
2186 if m_thumb is not None:
2187 video_thumbnail = m_thumb.group(1)
2188 elif 'thumbnail_url' not in video_info:
2189 self._downloader.report_warning('unable to extract video thumbnail')
2190 video_thumbnail = None
2191 else: # don't panic if we can't find it
2192 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2195 upload_date = self._html_search_meta(
2196 'datePublished', video_webpage, 'upload date', default=None)
2198 upload_date = self._search_regex(
2199 [r'(?s)id="eow-date.*?>(.*?)</span>',
2200 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2201 video_webpage, 'upload date', default=None)
2202 upload_date = unified_strdate(upload_date)
2204 video_license = self._html_search_regex(
2205 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2206 video_webpage, 'license', default=None)
2208 m_music = re.search(
2210 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2218 \bhref=["\']/red[^>]*>| # drop possible
2219 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2226 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2227 video_creator = clean_html(m_music.group('creator'))
2229 video_alt_title = video_creator = None
2231 def extract_meta(field):
2232 return self._html_search_regex(
2233 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2234 video_webpage, field, default=None)
2236 track = extract_meta('Song')
2237 artist = extract_meta('Artist')
2238 album = extract_meta('Album')
2240 # Youtube Music Auto-generated description
2241 release_date = release_year = None
2242 if video_description:
2243 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2246 track = mobj.group('track').strip()
2248 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2250 album = mobj.group('album'.strip())
2251 release_year = mobj.group('release_year')
2252 release_date = mobj.group('release_date')
2254 release_date = release_date.replace('-', '')
2255 if not release_year:
2256 release_year = int(release_date[:4])
2258 release_year = int(release_year)
2260 m_episode = re.search(
2261 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2264 series = unescapeHTML(m_episode.group('series'))
2265 season_number = int(m_episode.group('season'))
2266 episode_number = int(m_episode.group('episode'))
2268 series = season_number = episode_number = None
2270 m_cat_container = self._search_regex(
2271 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2272 video_webpage, 'categories', default=None)
2274 category = self._html_search_regex(
2275 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2277 video_categories = None if category is None else [category]
2279 video_categories = None
2282 unescapeHTML(m.group('content'))
2283 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2285 def _extract_count(count_name):
2286 return str_to_int(self._search_regex(
2287 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2288 % re.escape(count_name),
2289 video_webpage, count_name, default=None))
2291 like_count = _extract_count('like')
2292 dislike_count = _extract_count('dislike')
2294 if view_count is None:
2295 view_count = str_to_int(self._search_regex(
2296 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2297 'view count', default=None))
2300 float_or_none(video_details.get('averageRating'))
2301 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2304 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2305 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2307 video_duration = try_get(
2308 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2309 if not video_duration:
2310 video_duration = int_or_none(video_details.get('lengthSeconds'))
2311 if not video_duration:
2312 video_duration = parse_duration(self._html_search_meta(
2313 'duration', video_webpage, 'video duration'))
2316 video_annotations = None
2317 if self._downloader.params.get('writeannotations', False):
2318 xsrf_token = self._search_regex(
2319 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2320 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2321 invideo_url = try_get(
2322 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2323 if xsrf_token and invideo_url:
2324 xsrf_field_name = self._search_regex(
2325 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2326 video_webpage, 'xsrf field name',
2327 group='xsrf_field_name', default='session_token')
2328 video_annotations = self._download_webpage(
2329 self._proto_relative_url(invideo_url),
2330 video_id, note='Downloading annotations',
2331 errnote='Unable to download video annotations', fatal=False,
2332 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2334 chapters = self._extract_chapters(description_original, video_duration)
2336 # Look for the DASH manifest
2337 if self._downloader.params.get('youtube_include_dash_manifest', True):
2338 dash_mpd_fatal = True
2339 for mpd_url in dash_mpds:
2342 def decrypt_sig(mobj):
2344 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2345 return '/signature/%s' % dec_s
2347 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2349 for df in self._extract_mpd_formats(
2350 mpd_url, video_id, fatal=dash_mpd_fatal,
2351 formats_dict=self._formats):
2352 if not df.get('filesize'):
2353 df['filesize'] = _extract_filesize(df['url'])
2354 # Do not overwrite DASH format found in some previous DASH manifest
2355 if df['format_id'] not in dash_formats:
2356 dash_formats[df['format_id']] = df
2357 # Additional DASH manifests may end up in HTTP Error 403 therefore
2358 # allow them to fail without bug report message if we already have
2359 # some DASH manifest succeeded. This is temporary workaround to reduce
2360 # burst of bug reports until we figure out the reason and whether it
2361 # can be fixed at all.
2362 dash_mpd_fatal = False
2363 except (ExtractorError, KeyError) as e:
2364 self.report_warning(
2365 'Skipping DASH manifest: %r' % e, video_id)
2367 # Remove the formats we found through non-DASH, they
2368 # contain less info and it can be wrong, because we use
2369 # fixed values (for example the resolution). See
2370 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2372 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2373 formats.extend(dash_formats.values())
2375 # Check for malformed aspect ratio
2376 stretched_m = re.search(
2377 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2380 w = float(stretched_m.group('w'))
2381 h = float(stretched_m.group('h'))
2382 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2383 # We will only process correct ratios.
2387 if f.get('vcodec') != 'none':
2388 f['stretched_ratio'] = ratio
2391 token = extract_token(video_info)
2393 if 'reason' in video_info:
2394 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2395 regions_allowed = self._html_search_meta(
2396 'regionsAllowed', video_webpage, default=None)
2397 countries = regions_allowed.split(',') if regions_allowed else None
2398 self.raise_geo_restricted(
2399 msg=video_info['reason'][0], countries=countries)
2400 reason = video_info['reason'][0]
2401 if 'Invalid parameters' in reason:
2402 unavailable_message = extract_unavailable_message()
2403 if unavailable_message:
2404 reason = unavailable_message
2405 raise ExtractorError(
2406 'YouTube said: %s' % reason,
2407 expected=True, video_id=video_id)
2409 raise ExtractorError(
2410 '"token" parameter not in video info for unknown reason',
2413 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2414 raise ExtractorError('This video is DRM protected.', expected=True)
2416 self._sort_formats(formats)
2418 self.mark_watched(video_id, video_info, player_response)
2422 'uploader': video_uploader,
2423 'uploader_id': video_uploader_id,
2424 'uploader_url': video_uploader_url,
2425 'channel_id': channel_id,
2426 'channel_url': channel_url,
2427 'upload_date': upload_date,
2428 'license': video_license,
2429 'creator': video_creator or artist,
2430 'title': video_title,
2431 'alt_title': video_alt_title or track,
2432 'thumbnail': video_thumbnail,
2433 'description': video_description,
2434 'categories': video_categories,
2436 'subtitles': video_subtitles,
2437 'automatic_captions': automatic_captions,
2438 'duration': video_duration,
2439 'age_limit': 18 if age_gate else 0,
2440 'annotations': video_annotations,
2441 'chapters': chapters,
2442 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2443 'view_count': view_count,
2444 'like_count': like_count,
2445 'dislike_count': dislike_count,
2446 'average_rating': average_rating,
2449 'start_time': start_time,
2450 'end_time': end_time,
2452 'season_number': season_number,
2453 'episode_number': episode_number,
2457 'release_date': release_date,
2458 'release_year': release_year,
2462 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2463 IE_DESC = 'YouTube.com playlists'
2464 _VALID_URL = r"""(?x)(?:
2474 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2475 \? (?:.*?[&;])*? (?:p|a|list)=
2478 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2481 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2482 # Top tracks, they can also include dots
2488 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2489 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2490 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2491 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2492 IE_NAME = 'youtube:playlist'
2494 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2496 'title': 'ytdl test PL',
2497 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2499 'playlist_count': 3,
2501 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2503 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2504 'title': 'YDL_Empty_List',
2506 'playlist_count': 0,
2507 'skip': 'This playlist is private',
2509 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2510 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2512 'title': '29C3: Not my department',
2513 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2514 'uploader': 'Christiaan008',
2515 'uploader_id': 'ChRiStIaAn008',
2517 'playlist_count': 95,
2519 'note': 'issue #673',
2520 'url': 'PLBB231211A4F62143',
2522 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2523 'id': 'PLBB231211A4F62143',
2524 'uploader': 'Wickydoo',
2525 'uploader_id': 'Wickydoo',
2527 'playlist_mincount': 26,
2529 'note': 'Large playlist',
2530 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2532 'title': 'Uploads from Cauchemar',
2533 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2534 'uploader': 'Cauchemar',
2535 'uploader_id': 'Cauchemar89',
2537 'playlist_mincount': 799,
2539 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2541 'title': 'YDL_safe_search',
2542 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2544 'playlist_count': 2,
2545 'skip': 'This playlist is private',
2548 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2549 'playlist_count': 4,
2552 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2553 'uploader': 'milan',
2554 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2557 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2558 'playlist_mincount': 485,
2560 'title': '2018 Chinese New Singles (11/6 updated)',
2561 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2563 'uploader_id': 'sdragonfang',
2566 'note': 'Embedded SWF player',
2567 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2568 'playlist_count': 4,
2571 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2573 'skip': 'This playlist does not exist',
2575 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2576 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2578 'title': 'Uploads from Interstellar Movie',
2579 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2580 'uploader': 'Interstellar Movie',
2581 'uploader_id': 'InterstellarMovie1',
2583 'playlist_mincount': 21,
2585 # Playlist URL that does not actually serve a playlist
2586 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2588 'id': 'FqZTN594JQw',
2590 'title': "Smiley's People 01 detective, Adventure Series, Action",
2591 'uploader': 'STREEM',
2592 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2594 'upload_date': '20150526',
2595 'license': 'Standard YouTube License',
2596 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2597 'categories': ['People & Blogs'],
2601 'dislike_count': int,
2604 'skip_download': True,
2606 'skip': 'This video is not available.',
2607 'add_ie': [YoutubeIE.ie_key()],
2609 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2611 'id': 'yeWKywCrFtk',
2613 'title': 'Small Scale Baler and Braiding Rugs',
2614 'uploader': 'Backus-Page House Museum',
2615 'uploader_id': 'backuspagemuseum',
2616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2617 'upload_date': '20161008',
2618 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2619 'categories': ['Nonprofits & Activism'],
2622 'dislike_count': int,
2626 'skip_download': True,
2629 # https://github.com/ytdl-org/youtube-dl/issues/21844
2630 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2632 'title': 'Data Analysis with Dr Mike Pound',
2633 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634 'uploader_id': 'Computerphile',
2635 'uploader': 'Computerphile',
2637 'playlist_mincount': 11,
2639 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2640 'only_matching': True,
2642 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2643 'only_matching': True,
2645 # music album playlist
2646 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2647 'only_matching': True,
2649 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2650 'only_matching': True,
2653 def _real_initialize(self):
2656 def extract_videos_from_page(self, page):
2660 for item in re.findall(
2661 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2662 attrs = extract_attributes(item)
2663 video_id = attrs['data-video-id']
2664 video_title = unescapeHTML(attrs.get('data-title'))
2666 video_title = video_title.strip()
2667 ids_in_page.append(video_id)
2668 titles_in_page.append(video_title)
2670 # Fallback with old _VIDEO_RE
2671 self.extract_videos_from_page_impl(
2672 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2675 self.extract_videos_from_page_impl(
2676 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2677 ids_in_page, titles_in_page)
2678 self.extract_videos_from_page_impl(
2679 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2680 ids_in_page, titles_in_page)
2682 return zip(ids_in_page, titles_in_page)
2684 def _extract_mix(self, playlist_id):
2685 # The mixes are generated from a single video
2686 # the id of the playlist is just 'RD' + video_id
2688 last_id = playlist_id[-11:]
2689 for n in itertools.count(1):
2690 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2691 webpage = self._download_webpage(
2692 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2693 new_ids = orderedSet(re.findall(
2694 r'''(?xs)data-video-username=".*?".*?
2695 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2697 # Fetch new pages until all the videos are repeated, it seems that
2698 # there are always 51 unique videos.
2699 new_ids = [_id for _id in new_ids if _id not in ids]
2705 url_results = self._ids_to_results(ids)
2707 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2709 search_title('playlist-title')
2710 or search_title('title long-title')
2711 or search_title('title'))
2712 title = clean_html(title_span)
2714 return self.playlist_result(url_results, playlist_id, title)
2716 def _extract_playlist(self, playlist_id):
2717 url = self._TEMPLATE_URL % playlist_id
2718 page = self._download_webpage(url, playlist_id)
2720 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2721 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2722 match = match.strip()
2723 # Check if the playlist exists or is private
2724 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2726 reason = mobj.group('reason')
2727 message = 'This playlist %s' % reason
2728 if 'private' in reason:
2729 message += ', use --username or --netrc to access it'
2731 raise ExtractorError(message, expected=True)
2732 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2733 raise ExtractorError(
2734 'Invalid parameters. Maybe URL is incorrect.',
2736 elif re.match(r'[^<]*Choose your language[^<]*', match):
2739 self.report_warning('Youtube gives an alert message: ' + match)
2741 playlist_title = self._html_search_regex(
2742 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2743 page, 'title', default=None)
2745 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2746 uploader = self._html_search_regex(
2747 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2748 page, 'uploader', default=None)
2750 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2753 uploader_id = mobj.group('uploader_id')
2754 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2756 uploader_id = uploader_url = None
2760 if not playlist_title:
2762 # Some playlist URLs don't actually serve a playlist (e.g.
2763 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2764 next(self._entries(page, playlist_id))
2765 except StopIteration:
2768 playlist = self.playlist_result(
2769 self._entries(page, playlist_id), playlist_id, playlist_title)
2771 'uploader': uploader,
2772 'uploader_id': uploader_id,
2773 'uploader_url': uploader_url,
2776 return has_videos, playlist
2778 def _check_download_just_video(self, url, playlist_id):
2779 # Check if it's a video-specific URL
2780 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2781 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2782 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2783 'video id', default=None)
2785 if self._downloader.params.get('noplaylist'):
2786 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2787 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2789 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2790 return video_id, None
2793 def _real_extract(self, url):
2794 # Extract playlist id
2795 mobj = re.match(self._VALID_URL, url)
2797 raise ExtractorError('Invalid URL: %s' % url)
2798 playlist_id = mobj.group(1) or mobj.group(2)
2800 video_id, video = self._check_download_just_video(url, playlist_id)
2804 if playlist_id.startswith(('RD', 'UL', 'PU')):
2805 # Mixes require a custom extraction process
2806 return self._extract_mix(playlist_id)
2808 has_videos, playlist = self._extract_playlist(playlist_id)
2809 if has_videos or not video_id:
2812 # Some playlist URLs don't actually serve a playlist (see
2813 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2814 # Fallback to plain video extraction if there is a video id
2815 # along with playlist id.
2816 return self.url_result(video_id, 'Youtube', video_id=video_id)
2819 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2820 IE_DESC = 'YouTube.com channels'
2821 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2822 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2823 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2824 IE_NAME = 'youtube:channel'
2826 'note': 'paginated channel',
2827 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2828 'playlist_mincount': 91,
2830 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2831 'title': 'Uploads from lex will',
2832 'uploader': 'lex will',
2833 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2836 'note': 'Age restricted channel',
2837 # from https://www.youtube.com/user/DeusExOfficial
2838 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2839 'playlist_mincount': 64,
2841 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2842 'title': 'Uploads from Deus Ex',
2843 'uploader': 'Deus Ex',
2844 'uploader_id': 'DeusExOfficial',
2847 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2848 'only_matching': True,
2852 def suitable(cls, url):
2853 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2854 else super(YoutubeChannelIE, cls).suitable(url))
2856 def _build_template_url(self, url, channel_id):
2857 return self._TEMPLATE_URL % channel_id
2859 def _real_extract(self, url):
2860 channel_id = self._match_id(url)
2862 url = self._build_template_url(url, channel_id)
2864 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2865 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2866 # otherwise fallback on channel by page extraction
2867 channel_page = self._download_webpage(
2868 url + '?view=57', channel_id,
2869 'Downloading channel page', fatal=False)
2870 if channel_page is False:
2871 channel_playlist_id = False
2873 channel_playlist_id = self._html_search_meta(
2874 'channelId', channel_page, 'channel id', default=None)
2875 if not channel_playlist_id:
2876 channel_url = self._html_search_meta(
2877 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2878 channel_page, 'channel url', default=None)
2880 channel_playlist_id = self._search_regex(
2881 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2882 channel_url, 'channel id', default=None)
2883 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2884 playlist_id = 'UU' + channel_playlist_id[2:]
2885 return self.url_result(
2886 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2888 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2889 autogenerated = re.search(r'''(?x)
2891 channel-header-autogenerated-label|
2892 yt-channel-title-autogenerated
2893 )[^"]*"''', channel_page) is not None
2896 # The videos are contained in a single page
2897 # the ajax pages can't be used, they are empty
2900 video_id, 'Youtube', video_id=video_id,
2901 video_title=video_title)
2902 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2903 return self.playlist_result(entries, channel_id)
2906 next(self._entries(channel_page, channel_id))
2907 except StopIteration:
2908 alert_message = self._html_search_regex(
2909 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2910 channel_page, 'alert', default=None, group='alert')
2912 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2914 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2917 class YoutubeUserIE(YoutubeChannelIE):
2918 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2919 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2920 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2921 IE_NAME = 'youtube:user'
2924 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2925 'playlist_mincount': 320,
2927 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2928 'title': 'Uploads from The Linux Foundation',
2929 'uploader': 'The Linux Foundation',
2930 'uploader_id': 'TheLinuxFoundation',
2933 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2934 # but not https://www.youtube.com/user/12minuteathlete/videos
2935 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2936 'playlist_mincount': 249,
2938 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2939 'title': 'Uploads from 12 Minute Athlete',
2940 'uploader': '12 Minute Athlete',
2941 'uploader_id': 'the12minuteathlete',
2944 'url': 'ytuser:phihag',
2945 'only_matching': True,
2947 'url': 'https://www.youtube.com/c/gametrailers',
2948 'only_matching': True,
2950 'url': 'https://www.youtube.com/gametrailers',
2951 'only_matching': True,
2953 # This channel is not available, geo restricted to JP
2954 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2955 'only_matching': True,
2959 def suitable(cls, url):
2960 # Don't return True if the url can be extracted with other youtube
2961 # extractor, the regex would is too permissive and it would match.
2962 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2963 if any(ie.suitable(url) for ie in other_yt_ies):
2966 return super(YoutubeUserIE, cls).suitable(url)
2968 def _build_template_url(self, url, channel_id):
2969 mobj = re.match(self._VALID_URL, url)
2970 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2973 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2974 IE_DESC = 'YouTube.com live streams'
2975 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2976 IE_NAME = 'youtube:live'
2979 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2981 'id': 'a48o2S1cPoo',
2983 'title': 'The Young Turks - Live Main Show',
2984 'uploader': 'The Young Turks',
2985 'uploader_id': 'TheYoungTurks',
2986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2987 'upload_date': '20150715',
2988 'license': 'Standard YouTube License',
2989 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2990 'categories': ['News & Politics'],
2991 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2993 'dislike_count': int,
2996 'skip_download': True,
2999 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3000 'only_matching': True,
3002 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3003 'only_matching': True,
3005 'url': 'https://www.youtube.com/TheYoungTurks/live',
3006 'only_matching': True,
3009 def _real_extract(self, url):
3010 mobj = re.match(self._VALID_URL, url)
3011 channel_id = mobj.group('id')
3012 base_url = mobj.group('base_url')
3013 webpage = self._download_webpage(url, channel_id, fatal=False)
3015 page_type = self._og_search_property(
3016 'type', webpage, 'page type', default='')
3017 video_id = self._html_search_meta(
3018 'videoId', webpage, 'video id', default=None)
3019 if page_type.startswith('video') and video_id and re.match(
3020 r'^[0-9A-Za-z_-]{11}$', video_id):
3021 return self.url_result(video_id, YoutubeIE.ie_key())
3022 return self.url_result(base_url)
3025 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3026 IE_DESC = 'YouTube.com user/channel playlists'
3027 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3028 IE_NAME = 'youtube:playlists'
3031 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3032 'playlist_mincount': 4,
3034 'id': 'ThirstForScience',
3035 'title': 'ThirstForScience',
3038 # with "Load more" button
3039 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3040 'playlist_mincount': 70,
3043 'title': 'Игорь Клейнер',
3046 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3047 'playlist_mincount': 17,
3049 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3050 'title': 'Chem Player',
3056 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3057 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3060 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3061 IE_DESC = 'YouTube.com searches'
3062 # there doesn't appear to be a real limit, for example if you search for
3063 # 'python' you get more than 8.000.000 results
3064 _MAX_RESULTS = float('inf')
3065 IE_NAME = 'youtube:search'
3066 _SEARCH_KEY = 'ytsearch'
3067 _EXTRA_QUERY_ARGS = {}
3070 def _get_n_results(self, query, n):
3071 """Get a specified number of results for a query"""
3077 'search_query': query.encode('utf-8'),
3079 url_query.update(self._EXTRA_QUERY_ARGS)
3080 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3082 for pagenum in itertools.count(1):
3083 data = self._download_json(
3084 result_url, video_id='query "%s"' % query,
3085 note='Downloading page %s' % pagenum,
3086 errnote='Unable to download API page',
3087 query={'spf': 'navigate'})
3088 html_content = data[1]['body']['content']
3090 if 'class="search-message' in html_content:
3091 raise ExtractorError(
3092 '[youtube] No video results', expected=True)
3094 new_videos = list(self._process_page(html_content))
3095 videos += new_videos
3096 if not new_videos or len(videos) > limit:
3098 next_link = self._html_search_regex(
3099 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3100 html_content, 'next link', default=None)
3101 if next_link is None:
3103 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3107 return self.playlist_result(videos, query)
3110 class YoutubeSearchDateIE(YoutubeSearchIE):
3111 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3112 _SEARCH_KEY = 'ytsearchdate'
3113 IE_DESC = 'YouTube.com searches, newest videos first'
3114 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3117 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3118 IE_DESC = 'YouTube.com search URLs'
3119 IE_NAME = 'youtube:search_url'
3120 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3122 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3123 'playlist_mincount': 5,
3125 'title': 'youtube-dl test video',
3128 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3129 'only_matching': True,
3132 def _real_extract(self, url):
3133 mobj = re.match(self._VALID_URL, url)
3134 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3135 webpage = self._download_webpage(url, query)
3136 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3139 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3140 IE_DESC = 'YouTube.com (multi-season) shows'
3141 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3142 IE_NAME = 'youtube:show'
3144 'url': 'https://www.youtube.com/show/airdisasters',
3145 'playlist_mincount': 5,
3147 'id': 'airdisasters',
3148 'title': 'Air Disasters',
3152 def _real_extract(self, url):
3153 playlist_id = self._match_id(url)
3154 return super(YoutubeShowIE, self)._real_extract(
3155 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3158 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3160 Base class for feed extractors
3161 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3163 _LOGIN_REQUIRED = True
3167 return 'youtube:%s' % self._FEED_NAME
3169 def _real_initialize(self):
3172 def _entries(self, page):
3173 # The extraction process is the same as for playlists, but the regex
3174 # for the video ids doesn't contain an index
3176 more_widget_html = content_html = page
3177 for page_num in itertools.count(1):
3178 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3180 # 'recommended' feed has infinite 'load more' and each new portion spins
3181 # the same videos in (sometimes) slightly different order, so we'll check
3182 # for unicity and break when portion has no new videos
3183 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3189 for entry in self._ids_to_results(new_ids):
3192 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3196 more = self._download_json(
3197 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3198 'Downloading page #%s' % page_num,
3199 transform_source=uppercase_escape)
3200 content_html = more['content_html']
3201 more_widget_html = more['load_more_widget_html']
3203 def _real_extract(self, url):
3204 page = self._download_webpage(
3205 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3206 self._PLAYLIST_TITLE)
3207 return self.playlist_result(
3208 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3211 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3212 IE_NAME = 'youtube:watchlater'
3213 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3214 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3217 'url': 'https://www.youtube.com/playlist?list=WL',
3218 'only_matching': True,
3220 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3221 'only_matching': True,
3224 def _real_extract(self, url):
3225 _, video = self._check_download_just_video(url, 'WL')
3228 _, playlist = self._extract_playlist('WL')
3232 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3233 IE_NAME = 'youtube:favorites'
3234 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3235 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3236 _LOGIN_REQUIRED = True
3238 def _real_extract(self, url):
3239 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3240 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3241 return self.url_result(playlist_id, 'YoutubePlaylist')
3244 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3245 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3246 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3247 _FEED_NAME = 'recommended'
3248 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3251 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3252 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3253 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3254 _FEED_NAME = 'subscriptions'
3255 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3258 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3259 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3260 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3261 _FEED_NAME = 'history'
3262 _PLAYLIST_TITLE = 'Youtube History'
3265 class YoutubeTruncatedURLIE(InfoExtractor):
3266 IE_NAME = 'youtube:truncated_url'
3267 IE_DESC = False # Do not list
3268 _VALID_URL = r'''(?x)
3270 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3273 annotation_id=annotation_[^&]+|
3279 attribution_link\?a=[^&]+
3285 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3286 'only_matching': True,
3288 'url': 'https://www.youtube.com/watch?',
3289 'only_matching': True,
3291 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3292 'only_matching': True,
3294 'url': 'https://www.youtube.com/watch?feature=foo',
3295 'only_matching': True,
3297 'url': 'https://www.youtube.com/watch?hl=en-GB',
3298 'only_matching': True,
3300 'url': 'https://www.youtube.com/watch?t=2372',
3301 'only_matching': True,
3304 def _real_extract(self, url):
3305 raise ExtractorError(
3306 'Did you forget to quote the URL? Remember that & is a meta '
3307 'character in most shells, so you want to put the URL in quotes, '
3309 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3310 ' or simply youtube-dl BaW_jenozKc .',
3314 class YoutubeTruncatedIDIE(InfoExtractor):
3315 IE_NAME = 'youtube:truncated_id'
3316 IE_DESC = False # Do not list
3317 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3320 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3321 'only_matching': True,
3324 def _real_extract(self, url):
3325 video_id = self._match_id(url)
3326 raise ExtractorError(
3327 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),