3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
23 compat_urllib_parse_urlencode,
24 compat_urllib_parse_urlparse,
33 get_element_by_attribute,
54 class YoutubeBaseInfoExtractor(InfoExtractor):
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
63 _NETRC_MACHINE = 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED = False
67 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
69 def _set_language(self):
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
72 # YouTube sets the expire time to about two months
73 expire_time=time.time() + 2 * 30 * 24 * 3600)
75 def _ids_to_results(self, ids):
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 username, password = self._get_login_info()
89 # No authentication to be performed
91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
99 if login_page is False:
102 login_form = self._hidden_inputs(login_page)
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
112 'f.req': json.dumps(f_req),
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
126 self._downloader.report_warning(message)
130 None, [], None, 'US', None, None, 2, False, True,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 1, [None, None, []], None, None, None, True
141 lookup_results = req(
142 self._LOOKUP_URL, lookup_req,
143 'Looking up account info', 'Unable to look up account info')
145 if lookup_results is False:
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 warn('Unable to extract user hash')
155 None, 1, None, [1, None, None, None, [password, None, True]],
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
165 if challenge_results is False:
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 warn('Unable to extract result entry')
181 login_challenge = try_get(res, lambda x: x[0][0], list)
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status = try_get(login_challenge, lambda x: x[5], compat_str)
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 warn('Unable to extract TL')
197 tfa_code = self._get_tfa_info('2-step verification code')
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 tfa_code = remove_start(tfa_code, 'G-')
208 user_hash, None, 2, None,
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
218 if tfa_results is False:
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 challenge = CHALLENGES.get(
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
249 check_cookie_results = self._download_webpage(
250 check_cookie_url, None, 'Checking cookie', fatal=False)
252 if check_cookie_results is False:
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
261 def _download_webpage_handle(self, *args, **kwargs):
262 query = kwargs.get('query', {}).copy()
263 query['disable_polymer'] = 'true'
264 kwargs['query'] = query
265 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
266 *args, **compat_kwargs(kwargs))
268 def _real_initialize(self):
269 if self._downloader is None:
272 if not self._login():
276 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
277 # Extract entries from page with "Load more" button
278 def _entries(self, page, playlist_id):
279 more_widget_html = content_html = page
280 for page_num in itertools.count(1):
281 for entry in self._process_page(content_html):
284 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
288 more = self._download_json(
289 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
290 'Downloading page #%s' % page_num,
291 transform_source=uppercase_escape)
292 content_html = more['content_html']
293 if not content_html.strip():
294 # Some webpages show a "Load more" button but they don't
297 more_widget_html = more['load_more_widget_html']
300 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
301 def _process_page(self, content):
302 for video_id, video_title in self.extract_videos_from_page(content):
303 yield self.url_result(video_id, 'Youtube', video_id, video_title)
305 def extract_videos_from_page(self, page):
308 for mobj in re.finditer(self._VIDEO_RE, page):
309 # The link with index 0 is not the first video of the playlist (not sure if still actual)
310 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
312 video_id = mobj.group('id')
313 video_title = unescapeHTML(mobj.group('title'))
315 video_title = video_title.strip()
317 idx = ids_in_page.index(video_id)
318 if video_title and not titles_in_page[idx]:
319 titles_in_page[idx] = video_title
321 ids_in_page.append(video_id)
322 titles_in_page.append(video_title)
323 return zip(ids_in_page, titles_in_page)
326 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
327 def _process_page(self, content):
328 for playlist_id in orderedSet(re.findall(
329 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
331 yield self.url_result(
332 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
334 def _real_extract(self, url):
335 playlist_id = self._match_id(url)
336 webpage = self._download_webpage(url, playlist_id)
337 title = self._og_search_title(webpage, fatal=False)
338 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
341 class YoutubeIE(YoutubeBaseInfoExtractor):
342 IE_DESC = 'YouTube.com'
343 _VALID_URL = r"""(?x)^
345 (?:https?://|//) # http(s):// or protocol-independent URL
346 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
347 (?:www\.)?deturl\.com/www\.youtube\.com/|
348 (?:www\.)?pwnyoutube\.com/|
349 (?:www\.)?hooktube\.com/|
350 (?:www\.)?yourepeat\.com/|
351 tube\.majestyc\.net/|
352 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
353 (?:.*?\#/)? # handle anchor (#/) redirect urls
354 (?: # the various things that can precede the ID:
355 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
356 |(?: # or the v= param in all its forms
357 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
358 (?:\?|\#!?) # the params delimiter ? or # or #!
359 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
364 youtu\.be| # just youtu.be/xxxx
365 vid\.plus| # or vid.plus/xxxx
366 zwearz\.com/watch| # or zwearz.com/watch/xxxx
368 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
370 )? # all until now is optional -> you can pass the naked ID
371 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
374 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
375 WL # WL are handled by the watch later IE
378 (?(1).+)? # if we found the ID, everything can follow
379 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
380 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
382 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
383 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
384 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
385 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
386 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
387 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
388 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
389 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
390 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
391 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
392 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
393 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
394 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
395 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
396 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
397 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
398 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
399 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
403 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
404 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
405 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
406 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
407 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
408 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
409 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
411 # Apple HTTP Live Streaming
412 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
413 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
414 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
415 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
416 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
417 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
418 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
419 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
422 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
423 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
424 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
425 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
426 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
427 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
428 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
429 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
430 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
431 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
432 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
433 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
436 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
437 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
438 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
439 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
440 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
441 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
442 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
445 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
446 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
447 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
448 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
449 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
450 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
451 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
452 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
454 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
456 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
458 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
459 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
461 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
462 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
463 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
464 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
465 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
469 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
470 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
472 # Dash webm audio with opus inside
473 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
474 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
475 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
478 '_rtmp': {'protocol': 'rtmp'},
480 _SUBTITLE_FORMATS = ('ttml', 'vtt')
487 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
491 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
492 'uploader': 'Philipp Hagemeister',
493 'uploader_id': 'phihag',
494 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
495 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
496 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
497 'upload_date': '20121002',
498 'license': 'Standard YouTube License',
499 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
500 'categories': ['Science & Technology'],
501 'tags': ['youtube-dl'],
504 'dislike_count': int,
510 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
511 'note': 'Test generic use_cipher_signature video (#897)',
515 'upload_date': '20120506',
516 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
517 'alt_title': 'I Love It (feat. Charli XCX)',
518 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
519 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
520 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
521 'iconic ep', 'iconic', 'love', 'it'],
523 'uploader': 'Icona Pop',
524 'uploader_id': 'IconaPop',
525 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
526 'license': 'Standard YouTube License',
527 'creator': 'Icona Pop',
528 'track': 'I Love It (feat. Charli XCX)',
529 'artist': 'Icona Pop',
533 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
534 'note': 'Test VEVO video with age protection (#956)',
538 'upload_date': '20130703',
539 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
540 'alt_title': 'Tunnel Vision',
541 'description': 'md5:64249768eec3bc4276236606ea996373',
543 'uploader': 'justintimberlakeVEVO',
544 'uploader_id': 'justintimberlakeVEVO',
545 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
546 'license': 'Standard YouTube License',
547 'creator': 'Justin Timberlake',
548 'track': 'Tunnel Vision',
549 'artist': 'Justin Timberlake',
554 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
555 'note': 'Embed-only video (#1746)',
559 'upload_date': '20120608',
560 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
561 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
562 'uploader': 'SET India',
563 'uploader_id': 'setindia',
564 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
565 'license': 'Standard YouTube License',
570 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
571 'note': 'Use the first video ID in the URL',
575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
579 'upload_date': '20121002',
580 'license': 'Standard YouTube License',
581 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
582 'categories': ['Science & Technology'],
583 'tags': ['youtube-dl'],
586 'dislike_count': int,
589 'skip_download': True,
593 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
594 'note': '256k DASH audio (format 141) via DASH manifest',
598 'upload_date': '20121002',
599 'uploader_id': '8KVIDEO',
600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
602 'uploader': '8KVIDEO',
603 'license': 'Standard YouTube License',
604 'title': 'UHDTV TEST 8K VIDEO.mp4'
607 'youtube_include_dash_manifest': True,
610 'skip': 'format 141 not served anymore',
612 # DASH manifest with encrypted signature
614 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
618 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
619 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
621 'uploader': 'AfrojackVEVO',
622 'uploader_id': 'AfrojackVEVO',
623 'upload_date': '20131011',
624 'license': 'Standard YouTube License',
627 'youtube_include_dash_manifest': True,
628 'format': '141/bestaudio[ext=m4a]',
631 # JS player signature function name containing $
633 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
637 'title': 'Taylor Swift - Shake It Off',
638 'alt_title': 'Shake It Off',
639 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
641 'uploader': 'TaylorSwiftVEVO',
642 'uploader_id': 'TaylorSwiftVEVO',
643 'upload_date': '20140818',
644 'license': 'Standard YouTube License',
645 'creator': 'Taylor Swift',
648 'youtube_include_dash_manifest': True,
649 'format': '141/bestaudio[ext=m4a]',
654 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
659 'upload_date': '20100909',
660 'uploader': 'TJ Kirk',
661 'uploader_id': 'TheAmazingAtheist',
662 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
663 'license': 'Standard YouTube License',
664 'title': 'Burning Everyone\'s Koran',
665 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
668 # Normal age-gate video (No vevo, embed allowed)
670 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
674 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
675 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
677 'uploader': 'The Witcher',
678 'uploader_id': 'WitcherGame',
679 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
680 'upload_date': '20140605',
681 'license': 'Standard YouTube License',
685 # Age-gate video with encrypted signature
687 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
691 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
692 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
694 'uploader': 'LloydVEVO',
695 'uploader_id': 'LloydVEVO',
696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
697 'upload_date': '20110629',
698 'license': 'Standard YouTube License',
702 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
703 # YouTube Red ad is not captured for creator
705 'url': '__2ABJjxzNo',
710 'upload_date': '20100430',
711 'uploader_id': 'deadmau5',
712 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
713 'creator': 'deadmau5',
714 'description': 'md5:12c56784b8032162bb936a5f76d55360',
715 'uploader': 'deadmau5',
716 'license': 'Standard YouTube License',
717 'title': 'Deadmau5 - Some Chords (HD)',
718 'alt_title': 'Some Chords',
720 'expected_warnings': [
721 'DASH manifest missing',
724 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
726 'url': 'lqQg6PlCWgI',
731 'upload_date': '20150827',
732 'uploader_id': 'olympic',
733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
734 'license': 'Standard YouTube License',
735 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
736 'uploader': 'Olympic',
737 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
740 'skip_download': 'requires avconv',
745 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
749 'stretched_ratio': 16 / 9.,
751 'upload_date': '20110310',
752 'uploader_id': 'AllenMeow',
753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
754 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
756 'license': 'Standard YouTube License',
757 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
760 # url_encoded_fmt_stream_map is empty string
762 'url': 'qEJwOuvDf7I',
766 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
768 'upload_date': '20150404',
769 'uploader_id': 'spbelect',
770 'uploader': 'Наблюдатели Петербурга',
773 'skip_download': 'requires avconv',
775 'skip': 'This live event has ended.',
777 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
779 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
783 'title': 'md5:7b81415841e02ecd4313668cde88737a',
784 'description': 'md5:116377fd2963b81ec4ce64b542173306',
786 'upload_date': '20150625',
787 'uploader_id': 'dorappi2000',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
789 'uploader': 'dorappi2000',
790 'license': 'Standard YouTube License',
791 'formats': 'mincount:31',
793 'skip': 'not actual anymore',
795 # DASH manifest with segment_list
797 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
798 'md5': '8ce563a1d667b599d21064e982ab9e31',
802 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
803 'uploader': 'Airtek',
804 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
805 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
806 'license': 'Standard YouTube License',
807 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
810 'youtube_include_dash_manifest': True,
811 'format': '135', # bestvideo
813 'skip': 'This live event has ended.',
816 # Multifeed videos (multiple cameras), URL is for Main Camera
817 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
820 'title': 'teamPGP: Rocket League Noob Stream',
821 'description': 'md5:dc7872fb300e143831327f1bae3af010',
827 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
828 'description': 'md5:dc7872fb300e143831327f1bae3af010',
830 'upload_date': '20150721',
831 'uploader': 'Beer Games Beer',
832 'uploader_id': 'beergamesbeer',
833 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
834 'license': 'Standard YouTube License',
840 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
841 'description': 'md5:dc7872fb300e143831327f1bae3af010',
843 'upload_date': '20150721',
844 'uploader': 'Beer Games Beer',
845 'uploader_id': 'beergamesbeer',
846 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
847 'license': 'Standard YouTube License',
853 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
854 'description': 'md5:dc7872fb300e143831327f1bae3af010',
856 'upload_date': '20150721',
857 'uploader': 'Beer Games Beer',
858 'uploader_id': 'beergamesbeer',
859 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
860 'license': 'Standard YouTube License',
866 'title': 'teamPGP: Rocket League Noob Stream (zim)',
867 'description': 'md5:dc7872fb300e143831327f1bae3af010',
869 'upload_date': '20150721',
870 'uploader': 'Beer Games Beer',
871 'uploader_id': 'beergamesbeer',
872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
873 'license': 'Standard YouTube License',
877 'skip_download': True,
881 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
882 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
885 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
888 'skip': 'Not multifeed anymore',
891 'url': 'https://vid.plus/FlRa-iH7PGw',
892 'only_matching': True,
895 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
896 'only_matching': True,
899 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
900 # Also tests cut-off URL expansion in video description (see
901 # https://github.com/rg3/youtube-dl/issues/1892,
902 # https://github.com/rg3/youtube-dl/issues/8164)
903 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
907 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
908 'alt_title': 'Dark Walk - Position Music',
909 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
911 'upload_date': '20151119',
912 'uploader_id': 'IronSoulElf',
913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
914 'uploader': 'IronSoulElf',
915 'license': 'Standard YouTube License',
916 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
917 'track': 'Dark Walk - Position Music',
918 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
921 'skip_download': True,
925 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
926 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
927 'only_matching': True,
930 # Video with yt:stretch=17:0
931 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
935 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
936 'description': 'md5:ee18a25c350637c8faff806845bddee9',
937 'upload_date': '20151107',
938 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
939 'uploader': 'CH GAMER DROID',
942 'skip_download': True,
944 'skip': 'This video does not exist.',
947 # Video licensed under Creative Commons
948 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
952 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
953 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
955 'upload_date': '20150127',
956 'uploader_id': 'BerkmanCenter',
957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
958 'uploader': 'The Berkman Klein Center for Internet & Society',
959 'license': 'Creative Commons Attribution license (reuse allowed)',
962 'skip_download': True,
966 # Channel-like uploader_url
967 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
971 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
972 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
974 'upload_date': '20151119',
975 'uploader': 'Bernie Sanders',
976 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
978 'license': 'Creative Commons Attribution license (reuse allowed)',
981 'skip_download': True,
985 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
986 'only_matching': True,
989 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
990 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
991 'only_matching': True,
994 # Rental video preview
995 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
999 'title': 'Piku - Trailer',
1000 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1001 'upload_date': '20150811',
1002 'uploader': 'FlixMatrix',
1003 'uploader_id': 'FlixMatrixKaravan',
1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1005 'license': 'Standard YouTube License',
1008 'skip_download': True,
1010 'skip': 'This video is not available.',
1013 # YouTube Red video with episode data
1014 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1016 'id': 'iqKdEhx-dD4',
1018 'title': 'Isolation - Mind Field (Ep 1)',
1019 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1021 'upload_date': '20170118',
1022 'uploader': 'Vsauce',
1023 'uploader_id': 'Vsauce',
1024 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1025 'license': 'Standard YouTube License',
1026 'series': 'Mind Field',
1028 'episode_number': 1,
1031 'skip_download': True,
1033 'expected_warnings': [
1034 'Skipping DASH manifest',
1038 # The following content has been identified by the YouTube community
1039 # as inappropriate or offensive to some audiences.
1040 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1042 'id': '6SJNVb0GnPI',
1044 'title': 'Race Differences in Intelligence',
1045 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1047 'upload_date': '20140124',
1048 'uploader': 'New Century Foundation',
1049 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1051 'license': 'Standard YouTube License',
1054 'skip_download': True,
1059 'url': '1t24XAntNCY',
1060 'only_matching': True,
1063 # geo restricted to JP
1064 'url': 'sJL6WA-aGkQ',
1065 'only_matching': True,
1068 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1069 'only_matching': True,
1073 def __init__(self, *args, **kwargs):
1074 super(YoutubeIE, self).__init__(*args, **kwargs)
1075 self._player_cache = {}
1077 def report_video_info_webpage_download(self, video_id):
1078 """Report attempt to download video info webpage."""
1079 self.to_screen('%s: Downloading video info webpage' % video_id)
1081 def report_information_extraction(self, video_id):
1082 """Report attempt to extract video information."""
1083 self.to_screen('%s: Extracting video information' % video_id)
1085 def report_unavailable_format(self, video_id, format):
1086 """Report extracted video URL."""
1087 self.to_screen('%s: Format %s not available' % (video_id, format))
1089 def report_rtmp_download(self):
1090 """Indicate the download will use the RTMP protocol."""
1091 self.to_screen('RTMP download detected')
1093 def _signature_cache_id(self, example_sig):
1094 """ Return a string representation of a signature """
1095 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1097 def _extract_signature_function(self, video_id, player_url, example_sig):
1099 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1102 raise ExtractorError('Cannot identify player %r' % player_url)
1103 player_type = id_m.group('ext')
1104 player_id = id_m.group('id')
1106 # Read from filesystem cache
1107 func_id = '%s_%s_%s' % (
1108 player_type, player_id, self._signature_cache_id(example_sig))
1109 assert os.path.basename(func_id) == func_id
1111 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1112 if cache_spec is not None:
1113 return lambda s: ''.join(s[i] for i in cache_spec)
1116 'Downloading player %s' % player_url
1117 if self._downloader.params.get('verbose') else
1118 'Downloading %s player %s' % (player_type, player_id)
1120 if player_type == 'js':
1121 code = self._download_webpage(
1122 player_url, video_id,
1124 errnote='Download of %s failed' % player_url)
1125 res = self._parse_sig_js(code)
1126 elif player_type == 'swf':
1127 urlh = self._request_webpage(
1128 player_url, video_id,
1130 errnote='Download of %s failed' % player_url)
1132 res = self._parse_sig_swf(code)
1134 assert False, 'Invalid player type %r' % player_type
1136 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1137 cache_res = res(test_string)
1138 cache_spec = [ord(c) for c in cache_res]
1140 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1143 def _print_sig_code(self, func, example_sig):
1144 def gen_sig_code(idxs):
1145 def _genslice(start, end, step):
1146 starts = '' if start == 0 else str(start)
1147 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1148 steps = '' if step == 1 else (':%d' % step)
1149 return 's[%s%s%s]' % (starts, ends, steps)
1152 # Quelch pyflakes warnings - start will be set when step is set
1153 start = '(Never used)'
1154 for i, prev in zip(idxs[1:], idxs[:-1]):
1155 if step is not None:
1156 if i - prev == step:
1158 yield _genslice(start, prev, step)
1161 if i - prev in [-1, 1]:
1166 yield 's[%d]' % prev
1170 yield _genslice(start, i, step)
1172 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1173 cache_res = func(test_string)
1174 cache_spec = [ord(c) for c in cache_res]
1175 expr_code = ' + '.join(gen_sig_code(cache_spec))
1176 signature_id_tuple = '(%s)' % (
1177 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1178 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1179 ' return %s\n') % (signature_id_tuple, expr_code)
1180 self.to_screen('Extracted signature function:\n' + code)
1182 def _parse_sig_js(self, jscode):
1183 funcname = self._search_regex(
1184 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1185 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1186 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1187 r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1188 jscode, 'Initial JS player signature function name', group='sig')
1190 jsi = JSInterpreter(jscode)
1191 initial_function = jsi.extract_function(funcname)
1192 return lambda s: initial_function([s])
1194 def _parse_sig_swf(self, file_contents):
1195 swfi = SWFInterpreter(file_contents)
1196 TARGET_CLASSNAME = 'SignatureDecipher'
1197 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1198 initial_function = swfi.extract_function(searched_class, 'decipher')
1199 return lambda s: initial_function([s])
1201 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1202 """Turn the encrypted s field into a working signature"""
1204 if player_url is None:
1205 raise ExtractorError('Cannot decrypt signature without player_url')
1207 if player_url.startswith('//'):
1208 player_url = 'https:' + player_url
1209 elif not re.match(r'https?://', player_url):
1210 player_url = compat_urlparse.urljoin(
1211 'https://www.youtube.com', player_url)
1213 player_id = (player_url, self._signature_cache_id(s))
1214 if player_id not in self._player_cache:
1215 func = self._extract_signature_function(
1216 video_id, player_url, s
1218 self._player_cache[player_id] = func
1219 func = self._player_cache[player_id]
1220 if self._downloader.params.get('youtube_print_sig_code'):
1221 self._print_sig_code(func, s)
1223 except Exception as e:
1224 tb = traceback.format_exc()
1225 raise ExtractorError(
1226 'Signature extraction failed: ' + tb, cause=e)
1228 def _get_subtitles(self, video_id, webpage):
1230 subs_doc = self._download_xml(
1231 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1232 video_id, note=False)
1233 except ExtractorError as err:
1234 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1238 for track in subs_doc.findall('track'):
1239 lang = track.attrib['lang_code']
1240 if lang in sub_lang_list:
1243 for ext in self._SUBTITLE_FORMATS:
1244 params = compat_urllib_parse_urlencode({
1248 'name': track.attrib['name'].encode('utf-8'),
1250 sub_formats.append({
1251 'url': 'https://www.youtube.com/api/timedtext?' + params,
1254 sub_lang_list[lang] = sub_formats
1255 if not sub_lang_list:
1256 self._downloader.report_warning('video doesn\'t have subtitles')
1258 return sub_lang_list
1260 def _get_ytplayer_config(self, video_id, webpage):
1262 # User data may contain arbitrary character sequences that may affect
1263 # JSON extraction with regex, e.g. when '};' is contained the second
1264 # regex won't capture the whole JSON. Yet working around by trying more
1265 # concrete regex first keeping in mind proper quoted string handling
1266 # to be implemented in future that will replace this workaround (see
1267 # https://github.com/rg3/youtube-dl/issues/7468,
1268 # https://github.com/rg3/youtube-dl/pull/7599)
1269 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1270 r';ytplayer\.config\s*=\s*({.+?});',
1272 config = self._search_regex(
1273 patterns, webpage, 'ytplayer.config', default=None)
1275 return self._parse_json(
1276 uppercase_escape(config), video_id, fatal=False)
1278 def _get_automatic_captions(self, video_id, webpage):
1279 """We need the webpage for getting the captions url, pass it as an
1280 argument to speed up the process."""
1281 self.to_screen('%s: Looking for automatic captions' % video_id)
1282 player_config = self._get_ytplayer_config(video_id, webpage)
1283 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1284 if not player_config:
1285 self._downloader.report_warning(err_msg)
1288 args = player_config['args']
1289 caption_url = args.get('ttsurl')
1291 timestamp = args['timestamp']
1292 # We get the available subtitles
1293 list_params = compat_urllib_parse_urlencode({
1298 list_url = caption_url + '&' + list_params
1299 caption_list = self._download_xml(list_url, video_id)
1300 original_lang_node = caption_list.find('track')
1301 if original_lang_node is None:
1302 self._downloader.report_warning('Video doesn\'t have automatic captions')
1304 original_lang = original_lang_node.attrib['lang_code']
1305 caption_kind = original_lang_node.attrib.get('kind', '')
1308 for lang_node in caption_list.findall('target'):
1309 sub_lang = lang_node.attrib['lang_code']
1311 for ext in self._SUBTITLE_FORMATS:
1312 params = compat_urllib_parse_urlencode({
1313 'lang': original_lang,
1317 'kind': caption_kind,
1319 sub_formats.append({
1320 'url': caption_url + '&' + params,
1323 sub_lang_list[sub_lang] = sub_formats
1324 return sub_lang_list
1326 def make_captions(sub_url, sub_langs):
1327 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1328 caption_qs = compat_parse_qs(parsed_sub_url.query)
1330 for sub_lang in sub_langs:
1332 for ext in self._SUBTITLE_FORMATS:
1334 'tlang': [sub_lang],
1337 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1338 query=compat_urllib_parse_urlencode(caption_qs, True)))
1339 sub_formats.append({
1343 captions[sub_lang] = sub_formats
1346 # New captions format as of 22.06.2017
1347 player_response = args.get('player_response')
1348 if player_response and isinstance(player_response, compat_str):
1349 player_response = self._parse_json(
1350 player_response, video_id, fatal=False)
1352 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1353 base_url = renderer['captionTracks'][0]['baseUrl']
1355 for lang in renderer['translationLanguages']:
1356 lang_code = lang.get('languageCode')
1358 sub_lang_list.append(lang_code)
1359 return make_captions(base_url, sub_lang_list)
1361 # Some videos don't provide ttsurl but rather caption_tracks and
1362 # caption_translation_languages (e.g. 20LmZk1hakA)
1363 # Does not used anymore as of 22.06.2017
1364 caption_tracks = args['caption_tracks']
1365 caption_translation_languages = args['caption_translation_languages']
1366 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1368 for lang in caption_translation_languages.split(','):
1369 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1370 sub_lang = lang_qs.get('lc', [None])[0]
1372 sub_lang_list.append(sub_lang)
1373 return make_captions(caption_url, sub_lang_list)
1374 # An extractor error can be raise by the download process if there are
1375 # no automatic captions but there are subtitles
1376 except (KeyError, IndexError, ExtractorError):
1377 self._downloader.report_warning(err_msg)
1380 def _mark_watched(self, video_id, video_info):
1381 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1382 if not playback_url:
1384 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1385 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1387 # cpn generation algorithm is reverse engineered from base.js.
1388 # In fact it works even with dummy cpn.
1389 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1390 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1396 playback_url = compat_urlparse.urlunparse(
1397 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1399 self._download_webpage(
1400 playback_url, video_id, 'Marking watched',
1401 'Unable to mark watched', fatal=False)
1404 def _extract_urls(webpage):
1405 # Embedded YouTube player
1407 unescapeHTML(mobj.group('url'))
1408 for mobj in re.finditer(r'''(?x)
1418 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1419 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1422 # lazyYT YouTube embed
1423 entries.extend(list(map(
1425 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1427 # Wordpress "YouTube Video Importer" plugin
1428 matches = re.findall(r'''(?x)<div[^>]+
1429 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1430 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1431 entries.extend(m[-1] for m in matches)
1436 def _extract_url(webpage):
1437 urls = YoutubeIE._extract_urls(webpage)
1438 return urls[0] if urls else None
1441 def extract_id(cls, url):
1442 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1444 raise ExtractorError('Invalid URL: %s' % url)
1445 video_id = mobj.group(2)
1448 def _extract_annotations(self, video_id):
1449 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1450 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1453 def _extract_chapters(description, duration):
1456 chapter_lines = re.findall(
1457 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1459 if not chapter_lines:
1462 for next_num, (chapter_line, time_point) in enumerate(
1463 chapter_lines, start=1):
1464 start_time = parse_duration(time_point)
1465 if start_time is None:
1467 if start_time > duration:
1469 end_time = (duration if next_num == len(chapter_lines)
1470 else parse_duration(chapter_lines[next_num][1]))
1471 if end_time is None:
1473 if end_time > duration:
1475 if start_time > end_time:
1477 chapter_title = re.sub(
1478 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1479 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1481 'start_time': start_time,
1482 'end_time': end_time,
1483 'title': chapter_title,
1487 def _real_extract(self, url):
1488 url, smuggled_data = unsmuggle_url(url, {})
1491 'http' if self._downloader.params.get('prefer_insecure', False)
1496 parsed_url = compat_urllib_parse_urlparse(url)
1497 for component in [parsed_url.fragment, parsed_url.query]:
1498 query = compat_parse_qs(component)
1499 if start_time is None and 't' in query:
1500 start_time = parse_duration(query['t'][0])
1501 if start_time is None and 'start' in query:
1502 start_time = parse_duration(query['start'][0])
1503 if end_time is None and 'end' in query:
1504 end_time = parse_duration(query['end'][0])
1506 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1507 mobj = re.search(self._NEXT_URL_RE, url)
1509 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1510 video_id = self.extract_id(url)
1513 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1514 video_webpage = self._download_webpage(url, video_id)
1516 # Attempt to extract SWF player URL
1517 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1518 if mobj is not None:
1519 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1525 def add_dash_mpd(video_info):
1526 dash_mpd = video_info.get('dashmpd')
1527 if dash_mpd and dash_mpd[0] not in dash_mpds:
1528 dash_mpds.append(dash_mpd[0])
1533 def extract_view_count(v_info):
1534 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1537 embed_webpage = None
1538 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1540 # We simulate the access to the video from www.youtube.com/v/{video_id}
1541 # this can be viewed without login into Youtube
1542 url = proto + '://www.youtube.com/embed/%s' % video_id
1543 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1544 data = compat_urllib_parse_urlencode({
1545 'video_id': video_id,
1546 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1547 'sts': self._search_regex(
1548 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1550 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1551 video_info_webpage = self._download_webpage(
1552 video_info_url, video_id,
1553 note='Refetching age-gated info webpage',
1554 errnote='unable to download video info webpage')
1555 video_info = compat_parse_qs(video_info_webpage)
1556 add_dash_mpd(video_info)
1561 # Try looking directly into the video webpage
1562 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1564 args = ytplayer_config['args']
1565 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1566 # Convert to the same format returned by compat_parse_qs
1567 video_info = dict((k, [v]) for k, v in args.items())
1568 add_dash_mpd(video_info)
1569 # Rental video is not rented but preview is available (e.g.
1570 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1571 # https://github.com/rg3/youtube-dl/issues/10532)
1572 if not video_info and args.get('ypc_vid'):
1573 return self.url_result(
1574 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1575 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1577 sts = ytplayer_config.get('sts')
1578 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1579 # We also try looking in get_video_info since it may contain different dashmpd
1580 # URL that points to a DASH manifest with possibly different itag set (some itags
1581 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1582 # manifest pointed by get_video_info's dashmpd).
1583 # The general idea is to take a union of itags of both DASH manifests (for example
1584 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1585 self.report_video_info_webpage_download(video_id)
1586 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1588 'video_id': video_id,
1598 video_info_webpage = self._download_webpage(
1599 '%s://www.youtube.com/get_video_info' % proto,
1600 video_id, note=False,
1601 errnote='unable to download video info webpage',
1602 fatal=False, query=query)
1603 if not video_info_webpage:
1605 get_video_info = compat_parse_qs(video_info_webpage)
1606 add_dash_mpd(get_video_info)
1607 if view_count is None:
1608 view_count = extract_view_count(get_video_info)
1610 video_info = get_video_info
1611 if 'token' in get_video_info:
1612 # Different get_video_info requests may report different results, e.g.
1613 # some may report video unavailability, but some may serve it without
1614 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1615 # the original webpage as well as el=info and el=embedded get_video_info
1616 # requests report video unavailability due to geo restriction while
1617 # el=detailpage succeeds and returns valid data). This is probably
1618 # due to YouTube measures against IP ranges of hosting providers.
1619 # Working around by preferring the first succeeded video_info containing
1620 # the token if no such video_info yet was found.
1621 if 'token' not in video_info:
1622 video_info = get_video_info
1625 def extract_unavailable_message():
1626 return self._html_search_regex(
1627 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1628 video_webpage, 'unavailable message', default=None)
1630 if 'token' not in video_info:
1631 if 'reason' in video_info:
1632 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1633 regions_allowed = self._html_search_meta(
1634 'regionsAllowed', video_webpage, default=None)
1635 countries = regions_allowed.split(',') if regions_allowed else None
1636 self.raise_geo_restricted(
1637 msg=video_info['reason'][0], countries=countries)
1638 reason = video_info['reason'][0]
1639 if 'Invalid parameters' in reason:
1640 unavailable_message = extract_unavailable_message()
1641 if unavailable_message:
1642 reason = unavailable_message
1643 raise ExtractorError(
1644 'YouTube said: %s' % reason,
1645 expected=True, video_id=video_id)
1647 raise ExtractorError(
1648 '"token" parameter not in video info for unknown reason',
1652 if 'title' in video_info:
1653 video_title = video_info['title'][0]
1655 self._downloader.report_warning('Unable to extract video title')
1659 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1660 if video_description:
1663 redir_url = compat_urlparse.urljoin(url, m.group(1))
1664 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1665 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1666 qs = compat_parse_qs(parsed_redir_url.query)
1672 description_original = video_description = re.sub(r'''(?x)
1674 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1675 (?:title|href)="([^"]+)"\s+
1676 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1680 ''', replace_url, video_description)
1681 video_description = clean_html(video_description)
1683 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1685 video_description = unescapeHTML(fd_mobj.group(1))
1687 video_description = ''
1689 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1690 if not self._downloader.params.get('noplaylist'):
1693 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1694 for feed in multifeed_metadata_list.split(','):
1695 # Unquote should take place before split on comma (,) since textual
1696 # fields may contain comma as well (see
1697 # https://github.com/rg3/youtube-dl/issues/8536)
1698 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1700 '_type': 'url_transparent',
1701 'ie_key': 'Youtube',
1703 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1704 {'force_singlefeed': True}),
1705 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1707 feed_ids.append(feed_data['id'][0])
1709 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1710 % (', '.join(feed_ids), video_id))
1711 return self.playlist_result(entries, video_id, video_title, video_description)
1712 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1714 if view_count is None:
1715 view_count = extract_view_count(video_info)
1717 # Check for "rental" videos
1718 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1719 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1721 def _extract_filesize(media_url):
1722 return int_or_none(self._search_regex(
1723 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1725 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1726 self.report_rtmp_download()
1728 'format_id': '_rtmp',
1730 'url': video_info['conn'][0],
1731 'player_url': player_url,
1733 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1734 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1735 if 'rtmpe%3Dyes' in encoded_url_map:
1736 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1738 fmt_list = video_info.get('fmt_list', [''])[0]
1740 for fmt in fmt_list.split(','):
1741 spec = fmt.split('/')
1743 width_height = spec[1].split('x')
1744 if len(width_height) == 2:
1745 formats_spec[spec[0]] = {
1746 'resolution': spec[1],
1747 'width': int_or_none(width_height[0]),
1748 'height': int_or_none(width_height[1]),
1750 q = qualities(['small', 'medium', 'hd720'])
1752 for url_data_str in encoded_url_map.split(','):
1753 url_data = compat_parse_qs(url_data_str)
1754 if 'itag' not in url_data or 'url' not in url_data:
1756 format_id = url_data['itag'][0]
1757 url = url_data['url'][0]
1759 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1760 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1761 jsplayer_url_json = self._search_regex(
1763 embed_webpage if age_gate else video_webpage,
1764 'JS player URL (1)', default=None)
1765 if not jsplayer_url_json and not age_gate:
1766 # We need the embed website after all
1767 if embed_webpage is None:
1768 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1769 embed_webpage = self._download_webpage(
1770 embed_url, video_id, 'Downloading embed webpage')
1771 jsplayer_url_json = self._search_regex(
1772 ASSETS_RE, embed_webpage, 'JS player URL')
1774 player_url = json.loads(jsplayer_url_json)
1775 if player_url is None:
1776 player_url_json = self._search_regex(
1777 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1778 video_webpage, 'age gate player URL')
1779 player_url = json.loads(player_url_json)
1781 if 'sig' in url_data:
1782 url += '&signature=' + url_data['sig'][0]
1783 elif 's' in url_data:
1784 encrypted_sig = url_data['s'][0]
1786 if self._downloader.params.get('verbose'):
1787 if player_url is None:
1788 player_version = 'unknown'
1789 player_desc = 'unknown'
1791 if player_url.endswith('swf'):
1792 player_version = self._search_regex(
1793 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1794 'flash player', fatal=False)
1795 player_desc = 'flash player %s' % player_version
1797 player_version = self._search_regex(
1798 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1799 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1801 'html5 player', fatal=False)
1802 player_desc = 'html5 player %s' % player_version
1804 parts_sizes = self._signature_cache_id(encrypted_sig)
1805 self.to_screen('{%s} signature length %s, %s' %
1806 (format_id, parts_sizes, player_desc))
1808 signature = self._decrypt_signature(
1809 encrypted_sig, video_id, player_url, age_gate)
1810 url += '&signature=' + signature
1811 if 'ratebypass' not in url:
1812 url += '&ratebypass=yes'
1815 'format_id': format_id,
1817 'player_url': player_url,
1819 if format_id in self._formats:
1820 dct.update(self._formats[format_id])
1821 if format_id in formats_spec:
1822 dct.update(formats_spec[format_id])
1824 # Some itags are not included in DASH manifest thus corresponding formats will
1825 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1826 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1827 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1828 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1830 filesize = int_or_none(url_data.get(
1831 'clen', [None])[0]) or _extract_filesize(url)
1833 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1836 'filesize': filesize,
1837 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1840 'fps': int_or_none(url_data.get('fps', [None])[0]),
1841 'format_note': quality,
1842 'quality': q(quality),
1844 for key, value in more_fields.items():
1847 type_ = url_data.get('type', [None])[0]
1849 type_split = type_.split(';')
1850 kind_ext = type_split[0].split('/')
1851 if len(kind_ext) == 2:
1853 dct['ext'] = mimetype2ext(type_split[0])
1854 if kind in ('audio', 'video'):
1856 for mobj in re.finditer(
1857 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1858 if mobj.group('key') == 'codecs':
1859 codecs = mobj.group('val')
1862 dct.update(parse_codecs(codecs))
1863 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1864 dct['downloader_options'] = {
1865 # Youtube throttles chunks >~10M
1866 'http_chunk_size': 10485760,
1869 elif video_info.get('hlsvp'):
1870 manifest_url = video_info['hlsvp'][0]
1872 m3u8_formats = self._extract_m3u8_formats(
1873 manifest_url, video_id, 'mp4', fatal=False)
1874 for a_format in m3u8_formats:
1875 itag = self._search_regex(
1876 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1878 a_format['format_id'] = itag
1879 if itag in self._formats:
1880 dct = self._formats[itag].copy()
1881 dct.update(a_format)
1883 a_format['player_url'] = player_url
1884 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1885 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1886 formats.append(a_format)
1888 error_message = clean_html(video_info.get('reason', [None])[0])
1889 if not error_message:
1890 error_message = extract_unavailable_message()
1892 raise ExtractorError(error_message, expected=True)
1893 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1896 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1898 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1900 self._downloader.report_warning('unable to extract uploader name')
1903 video_uploader_id = None
1904 video_uploader_url = None
1906 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1908 if mobj is not None:
1909 video_uploader_id = mobj.group('uploader_id')
1910 video_uploader_url = mobj.group('uploader_url')
1912 self._downloader.report_warning('unable to extract uploader nickname')
1914 channel_id = self._html_search_meta(
1915 'channelId', video_webpage, 'channel id')
1916 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1919 # We try first to get a high quality image:
1920 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1921 video_webpage, re.DOTALL)
1922 if m_thumb is not None:
1923 video_thumbnail = m_thumb.group(1)
1924 elif 'thumbnail_url' not in video_info:
1925 self._downloader.report_warning('unable to extract video thumbnail')
1926 video_thumbnail = None
1927 else: # don't panic if we can't find it
1928 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1931 upload_date = self._html_search_meta(
1932 'datePublished', video_webpage, 'upload date', default=None)
1934 upload_date = self._search_regex(
1935 [r'(?s)id="eow-date.*?>(.*?)</span>',
1936 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1937 video_webpage, 'upload date', default=None)
1938 upload_date = unified_strdate(upload_date)
1940 video_license = self._html_search_regex(
1941 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1942 video_webpage, 'license', default=None)
1944 m_music = re.search(
1946 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1954 \bhref=["\']/red[^>]*>| # drop possible
1955 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1962 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1963 video_creator = clean_html(m_music.group('creator'))
1965 video_alt_title = video_creator = None
1967 def extract_meta(field):
1968 return self._html_search_regex(
1969 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1970 video_webpage, field, default=None)
1972 track = extract_meta('Song')
1973 artist = extract_meta('Artist')
1975 m_episode = re.search(
1976 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1979 series = m_episode.group('series')
1980 season_number = int(m_episode.group('season'))
1981 episode_number = int(m_episode.group('episode'))
1983 series = season_number = episode_number = None
1985 m_cat_container = self._search_regex(
1986 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1987 video_webpage, 'categories', default=None)
1989 category = self._html_search_regex(
1990 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1992 video_categories = None if category is None else [category]
1994 video_categories = None
1997 unescapeHTML(m.group('content'))
1998 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2000 def _extract_count(count_name):
2001 return str_to_int(self._search_regex(
2002 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2003 % re.escape(count_name),
2004 video_webpage, count_name, default=None))
2006 like_count = _extract_count('like')
2007 dislike_count = _extract_count('dislike')
2010 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2011 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2013 video_duration = try_get(
2014 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2015 if not video_duration:
2016 video_duration = parse_duration(self._html_search_meta(
2017 'duration', video_webpage, 'video duration'))
2020 video_annotations = None
2021 if self._downloader.params.get('writeannotations', False):
2022 video_annotations = self._extract_annotations(video_id)
2024 chapters = self._extract_chapters(description_original, video_duration)
2026 # Look for the DASH manifest
2027 if self._downloader.params.get('youtube_include_dash_manifest', True):
2028 dash_mpd_fatal = True
2029 for mpd_url in dash_mpds:
2032 def decrypt_sig(mobj):
2034 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2035 return '/signature/%s' % dec_s
2037 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2039 for df in self._extract_mpd_formats(
2040 mpd_url, video_id, fatal=dash_mpd_fatal,
2041 formats_dict=self._formats):
2042 if not df.get('filesize'):
2043 df['filesize'] = _extract_filesize(df['url'])
2044 # Do not overwrite DASH format found in some previous DASH manifest
2045 if df['format_id'] not in dash_formats:
2046 dash_formats[df['format_id']] = df
2047 # Additional DASH manifests may end up in HTTP Error 403 therefore
2048 # allow them to fail without bug report message if we already have
2049 # some DASH manifest succeeded. This is temporary workaround to reduce
2050 # burst of bug reports until we figure out the reason and whether it
2051 # can be fixed at all.
2052 dash_mpd_fatal = False
2053 except (ExtractorError, KeyError) as e:
2054 self.report_warning(
2055 'Skipping DASH manifest: %r' % e, video_id)
2057 # Remove the formats we found through non-DASH, they
2058 # contain less info and it can be wrong, because we use
2059 # fixed values (for example the resolution). See
2060 # https://github.com/rg3/youtube-dl/issues/5774 for an
2062 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2063 formats.extend(dash_formats.values())
2065 # Check for malformed aspect ratio
2066 stretched_m = re.search(
2067 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2070 w = float(stretched_m.group('w'))
2071 h = float(stretched_m.group('h'))
2072 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2073 # We will only process correct ratios.
2077 if f.get('vcodec') != 'none':
2078 f['stretched_ratio'] = ratio
2080 self._sort_formats(formats)
2082 self.mark_watched(video_id, video_info)
2086 'uploader': video_uploader,
2087 'uploader_id': video_uploader_id,
2088 'uploader_url': video_uploader_url,
2089 'channel_id': channel_id,
2090 'channel_url': channel_url,
2091 'upload_date': upload_date,
2092 'license': video_license,
2093 'creator': video_creator or artist,
2094 'title': video_title,
2095 'alt_title': video_alt_title or track,
2096 'thumbnail': video_thumbnail,
2097 'description': video_description,
2098 'categories': video_categories,
2100 'subtitles': video_subtitles,
2101 'automatic_captions': automatic_captions,
2102 'duration': video_duration,
2103 'age_limit': 18 if age_gate else 0,
2104 'annotations': video_annotations,
2105 'chapters': chapters,
2106 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2107 'view_count': view_count,
2108 'like_count': like_count,
2109 'dislike_count': dislike_count,
2110 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2113 'start_time': start_time,
2114 'end_time': end_time,
2116 'season_number': season_number,
2117 'episode_number': episode_number,
2123 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2124 IE_DESC = 'YouTube.com playlists'
2125 _VALID_URL = r"""(?x)(?:
2131 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2132 \? (?:.*?[&;])*? (?:p|a|list)=
2135 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2138 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2139 # Top tracks, they can also include dots
2145 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2146 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2147 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2148 IE_NAME = 'youtube:playlist'
2150 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2152 'title': 'ytdl test PL',
2153 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2155 'playlist_count': 3,
2157 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2159 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2160 'title': 'YDL_Empty_List',
2162 'playlist_count': 0,
2163 'skip': 'This playlist is private',
2165 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2166 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2168 'title': '29C3: Not my department',
2169 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2171 'playlist_count': 95,
2173 'note': 'issue #673',
2174 'url': 'PLBB231211A4F62143',
2176 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2177 'id': 'PLBB231211A4F62143',
2179 'playlist_mincount': 26,
2181 'note': 'Large playlist',
2182 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2184 'title': 'Uploads from Cauchemar',
2185 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2187 'playlist_mincount': 799,
2189 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2191 'title': 'YDL_safe_search',
2192 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2194 'playlist_count': 2,
2195 'skip': 'This playlist is private',
2198 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2199 'playlist_count': 4,
2202 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2205 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2206 'playlist_mincount': 485,
2208 'title': '2017 華語最新單曲 (2/24更新)',
2209 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2212 'note': 'Embedded SWF player',
2213 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2214 'playlist_count': 4,
2217 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2220 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2221 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2223 'title': 'Uploads from Interstellar Movie',
2224 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2226 'playlist_mincount': 21,
2228 # Playlist URL that does not actually serve a playlist
2229 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2231 'id': 'FqZTN594JQw',
2233 'title': "Smiley's People 01 detective, Adventure Series, Action",
2234 'uploader': 'STREEM',
2235 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2236 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2237 'upload_date': '20150526',
2238 'license': 'Standard YouTube License',
2239 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2240 'categories': ['People & Blogs'],
2243 'dislike_count': int,
2246 'skip_download': True,
2248 'add_ie': [YoutubeIE.ie_key()],
2250 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2252 'id': 'yeWKywCrFtk',
2254 'title': 'Small Scale Baler and Braiding Rugs',
2255 'uploader': 'Backus-Page House Museum',
2256 'uploader_id': 'backuspagemuseum',
2257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2258 'upload_date': '20161008',
2259 'license': 'Standard YouTube License',
2260 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2261 'categories': ['Nonprofits & Activism'],
2264 'dislike_count': int,
2268 'skip_download': True,
2271 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2272 'only_matching': True,
2274 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2275 'only_matching': True,
2277 # music album playlist
2278 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2279 'only_matching': True,
2282 def _real_initialize(self):
2285 def _extract_mix(self, playlist_id):
2286 # The mixes are generated from a single video
2287 # the id of the playlist is just 'RD' + video_id
2289 last_id = playlist_id[-11:]
2290 for n in itertools.count(1):
2291 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2292 webpage = self._download_webpage(
2293 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2294 new_ids = orderedSet(re.findall(
2295 r'''(?xs)data-video-username=".*?".*?
2296 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2298 # Fetch new pages until all the videos are repeated, it seems that
2299 # there are always 51 unique videos.
2300 new_ids = [_id for _id in new_ids if _id not in ids]
2306 url_results = self._ids_to_results(ids)
2308 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2310 search_title('playlist-title') or
2311 search_title('title long-title') or
2312 search_title('title'))
2313 title = clean_html(title_span)
2315 return self.playlist_result(url_results, playlist_id, title)
2317 def _extract_playlist(self, playlist_id):
2318 url = self._TEMPLATE_URL % playlist_id
2319 page = self._download_webpage(url, playlist_id)
2321 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2322 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2323 match = match.strip()
2324 # Check if the playlist exists or is private
2325 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2327 reason = mobj.group('reason')
2328 message = 'This playlist %s' % reason
2329 if 'private' in reason:
2330 message += ', use --username or --netrc to access it'
2332 raise ExtractorError(message, expected=True)
2333 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2334 raise ExtractorError(
2335 'Invalid parameters. Maybe URL is incorrect.',
2337 elif re.match(r'[^<]*Choose your language[^<]*', match):
2340 self.report_warning('Youtube gives an alert message: ' + match)
2342 playlist_title = self._html_search_regex(
2343 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2344 page, 'title', default=None)
2346 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2347 uploader = self._search_regex(
2348 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2349 page, 'uploader', default=None)
2351 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2354 uploader_id = mobj.group('uploader_id')
2355 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2357 uploader_id = uploader_url = None
2361 if not playlist_title:
2363 # Some playlist URLs don't actually serve a playlist (e.g.
2364 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2365 next(self._entries(page, playlist_id))
2366 except StopIteration:
2369 playlist = self.playlist_result(
2370 self._entries(page, playlist_id), playlist_id, playlist_title)
2372 'uploader': uploader,
2373 'uploader_id': uploader_id,
2374 'uploader_url': uploader_url,
2377 return has_videos, playlist
2379 def _check_download_just_video(self, url, playlist_id):
2380 # Check if it's a video-specific URL
2381 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2382 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2383 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2384 'video id', default=None)
2386 if self._downloader.params.get('noplaylist'):
2387 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2388 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2390 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2391 return video_id, None
2394 def _real_extract(self, url):
2395 # Extract playlist id
2396 mobj = re.match(self._VALID_URL, url)
2398 raise ExtractorError('Invalid URL: %s' % url)
2399 playlist_id = mobj.group(1) or mobj.group(2)
2401 video_id, video = self._check_download_just_video(url, playlist_id)
2405 if playlist_id.startswith(('RD', 'UL', 'PU')):
2406 # Mixes require a custom extraction process
2407 return self._extract_mix(playlist_id)
2409 has_videos, playlist = self._extract_playlist(playlist_id)
2410 if has_videos or not video_id:
2413 # Some playlist URLs don't actually serve a playlist (see
2414 # https://github.com/rg3/youtube-dl/issues/10537).
2415 # Fallback to plain video extraction if there is a video id
2416 # along with playlist id.
2417 return self.url_result(video_id, 'Youtube', video_id=video_id)
2420 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2421 IE_DESC = 'YouTube.com channels'
2422 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2423 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2424 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2425 IE_NAME = 'youtube:channel'
2427 'note': 'paginated channel',
2428 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2429 'playlist_mincount': 91,
2431 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2432 'title': 'Uploads from lex will',
2435 'note': 'Age restricted channel',
2436 # from https://www.youtube.com/user/DeusExOfficial
2437 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2438 'playlist_mincount': 64,
2440 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2441 'title': 'Uploads from Deus Ex',
2446 def suitable(cls, url):
2447 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2448 else super(YoutubeChannelIE, cls).suitable(url))
2450 def _build_template_url(self, url, channel_id):
2451 return self._TEMPLATE_URL % channel_id
2453 def _real_extract(self, url):
2454 channel_id = self._match_id(url)
2456 url = self._build_template_url(url, channel_id)
2458 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2459 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2460 # otherwise fallback on channel by page extraction
2461 channel_page = self._download_webpage(
2462 url + '?view=57', channel_id,
2463 'Downloading channel page', fatal=False)
2464 if channel_page is False:
2465 channel_playlist_id = False
2467 channel_playlist_id = self._html_search_meta(
2468 'channelId', channel_page, 'channel id', default=None)
2469 if not channel_playlist_id:
2470 channel_url = self._html_search_meta(
2471 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2472 channel_page, 'channel url', default=None)
2474 channel_playlist_id = self._search_regex(
2475 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2476 channel_url, 'channel id', default=None)
2477 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2478 playlist_id = 'UU' + channel_playlist_id[2:]
2479 return self.url_result(
2480 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2482 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2483 autogenerated = re.search(r'''(?x)
2485 channel-header-autogenerated-label|
2486 yt-channel-title-autogenerated
2487 )[^"]*"''', channel_page) is not None
2490 # The videos are contained in a single page
2491 # the ajax pages can't be used, they are empty
2494 video_id, 'Youtube', video_id=video_id,
2495 video_title=video_title)
2496 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2497 return self.playlist_result(entries, channel_id)
2500 next(self._entries(channel_page, channel_id))
2501 except StopIteration:
2502 alert_message = self._html_search_regex(
2503 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2504 channel_page, 'alert', default=None, group='alert')
2506 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2508 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2511 class YoutubeUserIE(YoutubeChannelIE):
2512 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2513 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2514 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2515 IE_NAME = 'youtube:user'
2518 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2519 'playlist_mincount': 320,
2521 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2522 'title': 'Uploads from The Linux Foundation',
2525 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2526 # but not https://www.youtube.com/user/12minuteathlete/videos
2527 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2528 'playlist_mincount': 249,
2530 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2531 'title': 'Uploads from 12 Minute Athlete',
2534 'url': 'ytuser:phihag',
2535 'only_matching': True,
2537 'url': 'https://www.youtube.com/c/gametrailers',
2538 'only_matching': True,
2540 'url': 'https://www.youtube.com/gametrailers',
2541 'only_matching': True,
2543 # This channel is not available, geo restricted to JP
2544 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2545 'only_matching': True,
2549 def suitable(cls, url):
2550 # Don't return True if the url can be extracted with other youtube
2551 # extractor, the regex would is too permissive and it would match.
2552 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2553 if any(ie.suitable(url) for ie in other_yt_ies):
2556 return super(YoutubeUserIE, cls).suitable(url)
2558 def _build_template_url(self, url, channel_id):
2559 mobj = re.match(self._VALID_URL, url)
2560 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2563 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2564 IE_DESC = 'YouTube.com live streams'
2565 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2566 IE_NAME = 'youtube:live'
2569 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2571 'id': 'a48o2S1cPoo',
2573 'title': 'The Young Turks - Live Main Show',
2574 'uploader': 'The Young Turks',
2575 'uploader_id': 'TheYoungTurks',
2576 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2577 'upload_date': '20150715',
2578 'license': 'Standard YouTube License',
2579 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2580 'categories': ['News & Politics'],
2581 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2583 'dislike_count': int,
2586 'skip_download': True,
2589 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2590 'only_matching': True,
2592 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2593 'only_matching': True,
2595 'url': 'https://www.youtube.com/TheYoungTurks/live',
2596 'only_matching': True,
2599 def _real_extract(self, url):
2600 mobj = re.match(self._VALID_URL, url)
2601 channel_id = mobj.group('id')
2602 base_url = mobj.group('base_url')
2603 webpage = self._download_webpage(url, channel_id, fatal=False)
2605 page_type = self._og_search_property(
2606 'type', webpage, 'page type', default='')
2607 video_id = self._html_search_meta(
2608 'videoId', webpage, 'video id', default=None)
2609 if page_type.startswith('video') and video_id and re.match(
2610 r'^[0-9A-Za-z_-]{11}$', video_id):
2611 return self.url_result(video_id, YoutubeIE.ie_key())
2612 return self.url_result(base_url)
2615 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2616 IE_DESC = 'YouTube.com user/channel playlists'
2617 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2618 IE_NAME = 'youtube:playlists'
2621 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2622 'playlist_mincount': 4,
2624 'id': 'ThirstForScience',
2625 'title': 'Thirst for Science',
2628 # with "Load more" button
2629 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2630 'playlist_mincount': 70,
2633 'title': 'Игорь Клейнер',
2636 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2637 'playlist_mincount': 17,
2639 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2640 'title': 'Chem Player',
2645 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2646 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2649 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2650 IE_DESC = 'YouTube.com searches'
2651 # there doesn't appear to be a real limit, for example if you search for
2652 # 'python' you get more than 8.000.000 results
2653 _MAX_RESULTS = float('inf')
2654 IE_NAME = 'youtube:search'
2655 _SEARCH_KEY = 'ytsearch'
2656 _EXTRA_QUERY_ARGS = {}
2659 def _get_n_results(self, query, n):
2660 """Get a specified number of results for a query"""
2666 'search_query': query.encode('utf-8'),
2668 url_query.update(self._EXTRA_QUERY_ARGS)
2669 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2671 for pagenum in itertools.count(1):
2672 data = self._download_json(
2673 result_url, video_id='query "%s"' % query,
2674 note='Downloading page %s' % pagenum,
2675 errnote='Unable to download API page',
2676 query={'spf': 'navigate'})
2677 html_content = data[1]['body']['content']
2679 if 'class="search-message' in html_content:
2680 raise ExtractorError(
2681 '[youtube] No video results', expected=True)
2683 new_videos = list(self._process_page(html_content))
2684 videos += new_videos
2685 if not new_videos or len(videos) > limit:
2687 next_link = self._html_search_regex(
2688 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2689 html_content, 'next link', default=None)
2690 if next_link is None:
2692 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2696 return self.playlist_result(videos, query)
2699 class YoutubeSearchDateIE(YoutubeSearchIE):
2700 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2701 _SEARCH_KEY = 'ytsearchdate'
2702 IE_DESC = 'YouTube.com searches, newest videos first'
2703 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2706 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2707 IE_DESC = 'YouTube.com search URLs'
2708 IE_NAME = 'youtube:search_url'
2709 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2711 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2712 'playlist_mincount': 5,
2714 'title': 'youtube-dl test video',
2717 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2718 'only_matching': True,
2721 def _real_extract(self, url):
2722 mobj = re.match(self._VALID_URL, url)
2723 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2724 webpage = self._download_webpage(url, query)
2725 return self.playlist_result(self._process_page(webpage), playlist_title=query)
2728 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2729 IE_DESC = 'YouTube.com (multi-season) shows'
2730 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2731 IE_NAME = 'youtube:show'
2733 'url': 'https://www.youtube.com/show/airdisasters',
2734 'playlist_mincount': 5,
2736 'id': 'airdisasters',
2737 'title': 'Air Disasters',
2741 def _real_extract(self, url):
2742 playlist_id = self._match_id(url)
2743 return super(YoutubeShowIE, self)._real_extract(
2744 'https://www.youtube.com/show/%s/playlists' % playlist_id)
2747 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2749 Base class for feed extractors
2750 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2752 _LOGIN_REQUIRED = True
2756 return 'youtube:%s' % self._FEED_NAME
2758 def _real_initialize(self):
2761 def _entries(self, page):
2762 # The extraction process is the same as for playlists, but the regex
2763 # for the video ids doesn't contain an index
2765 more_widget_html = content_html = page
2766 for page_num in itertools.count(1):
2767 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2769 # 'recommended' feed has infinite 'load more' and each new portion spins
2770 # the same videos in (sometimes) slightly different order, so we'll check
2771 # for unicity and break when portion has no new videos
2772 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2778 for entry in self._ids_to_results(new_ids):
2781 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2785 more = self._download_json(
2786 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2787 'Downloading page #%s' % page_num,
2788 transform_source=uppercase_escape)
2789 content_html = more['content_html']
2790 more_widget_html = more['load_more_widget_html']
2792 def _real_extract(self, url):
2793 page = self._download_webpage(
2794 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2795 self._PLAYLIST_TITLE)
2796 return self.playlist_result(
2797 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2800 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2801 IE_NAME = 'youtube:watchlater'
2802 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2803 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2806 'url': 'https://www.youtube.com/playlist?list=WL',
2807 'only_matching': True,
2809 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2810 'only_matching': True,
2813 def _real_extract(self, url):
2814 _, video = self._check_download_just_video(url, 'WL')
2817 _, playlist = self._extract_playlist('WL')
2821 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2822 IE_NAME = 'youtube:favorites'
2823 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2824 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2825 _LOGIN_REQUIRED = True
2827 def _real_extract(self, url):
2828 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2829 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2830 return self.url_result(playlist_id, 'YoutubePlaylist')
2833 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2834 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2835 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2836 _FEED_NAME = 'recommended'
2837 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2840 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2841 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2842 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2843 _FEED_NAME = 'subscriptions'
2844 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2847 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2848 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2849 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2850 _FEED_NAME = 'history'
2851 _PLAYLIST_TITLE = 'Youtube History'
2854 class YoutubeTruncatedURLIE(InfoExtractor):
2855 IE_NAME = 'youtube:truncated_url'
2856 IE_DESC = False # Do not list
2857 _VALID_URL = r'''(?x)
2859 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2862 annotation_id=annotation_[^&]+|
2868 attribution_link\?a=[^&]+
2874 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2875 'only_matching': True,
2877 'url': 'https://www.youtube.com/watch?',
2878 'only_matching': True,
2880 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2881 'only_matching': True,
2883 'url': 'https://www.youtube.com/watch?feature=foo',
2884 'only_matching': True,
2886 'url': 'https://www.youtube.com/watch?hl=en-GB',
2887 'only_matching': True,
2889 'url': 'https://www.youtube.com/watch?t=2372',
2890 'only_matching': True,
2893 def _real_extract(self, url):
2894 raise ExtractorError(
2895 'Did you forget to quote the URL? Remember that & is a meta '
2896 'character in most shells, so you want to put the URL in quotes, '
2898 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2899 ' or simply youtube-dl BaW_jenozKc .',
2903 class YoutubeTruncatedIDIE(InfoExtractor):
2904 IE_NAME = 'youtube:truncated_id'
2905 IE_DESC = False # Do not list
2906 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2909 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2910 'only_matching': True,
2913 def _real_extract(self, url):
2914 video_id = self._match_id(url)
2915 raise ExtractorError(
2916 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),