3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
32 get_element_by_attribute,
48 class YoutubeBaseInfoExtractor(InfoExtractor):
49 """Provide base functions for Youtube extractors"""
50 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
51 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
52 _NETRC_MACHINE = 'youtube'
53 # If True it will raise an error if no login info is provided
54 _LOGIN_REQUIRED = False
56 def _set_language(self):
58 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
59 # YouTube sets the expire time to about two months
60 expire_time=time.time() + 2 * 30 * 24 * 3600)
62 def _ids_to_results(self, ids):
64 self.url_result(vid_id, 'Youtube', video_id=vid_id)
69 Attempt to log in to YouTube.
70 True is returned if successful or skipped.
71 False is returned if login failed.
73 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
75 (username, password) = self._get_login_info()
76 # No authentication to be performed
78 if self._LOGIN_REQUIRED:
79 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
82 login_page = self._download_webpage(
83 self._LOGIN_URL, None,
84 note='Downloading login page',
85 errnote='unable to fetch login page', fatal=False)
86 if login_page is False:
89 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
90 login_page, 'Login GALX parameter')
94 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
99 'PersistentCookie': 'yes',
101 'bgresponse': 'js_disabled',
102 'checkConnection': '',
103 'checkedDomains': 'youtube',
110 'service': 'youtube',
115 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
117 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
118 login_results = self._download_webpage(
120 note='Logging in', errnote='unable to log in', fatal=False)
121 if login_results is False:
124 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
125 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
128 # TODO add SMS and phone call support - these require making a request and then prompting the user
130 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
131 tfa_code = self._get_tfa_info('2-step verification code')
134 self._downloader.report_warning(
135 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
136 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 tfa_code = remove_start(tfa_code, 'G-')
141 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
143 tfa_form_strs.update({
148 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
150 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
151 tfa_results = self._download_webpage(
153 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
155 if tfa_results is False:
158 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
159 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
161 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
162 self._downloader.report_warning('unable to log in - did the page structure change?')
164 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
165 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
168 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
169 self._downloader.report_warning('unable to log in: bad username or password')
173 def _real_initialize(self):
174 if self._downloader is None:
177 if not self._login():
181 class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
182 # Extract entries from page with "Load more" button
183 def _entries(self, page, playlist_id):
184 more_widget_html = content_html = page
185 for page_num in itertools.count(1):
186 for entry in self._process_page(content_html):
189 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
193 more = self._download_json(
194 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
195 'Downloading page #%s' % page_num,
196 transform_source=uppercase_escape)
197 content_html = more['content_html']
198 if not content_html.strip():
199 # Some webpages show a "Load more" button but they don't
202 more_widget_html = more['load_more_widget_html']
205 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
206 def _process_page(self, content):
207 for video_id, video_title in self.extract_videos_from_page(content):
208 yield self.url_result(video_id, 'Youtube', video_id, video_title)
210 def extract_videos_from_page(self, page):
213 for mobj in re.finditer(self._VIDEO_RE, page):
214 # The link with index 0 is not the first video of the playlist (not sure if still actual)
215 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
217 video_id = mobj.group('id')
218 video_title = unescapeHTML(mobj.group('title'))
220 video_title = video_title.strip()
222 idx = ids_in_page.index(video_id)
223 if video_title and not titles_in_page[idx]:
224 titles_in_page[idx] = video_title
226 ids_in_page.append(video_id)
227 titles_in_page.append(video_title)
228 return zip(ids_in_page, titles_in_page)
231 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
232 def _process_page(self, content):
233 for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
234 yield self.url_result(
235 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
237 def _real_extract(self, url):
238 playlist_id = self._match_id(url)
239 webpage = self._download_webpage(url, playlist_id)
240 title = self._og_search_title(webpage, fatal=False)
241 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
244 class YoutubeIE(YoutubeBaseInfoExtractor):
245 IE_DESC = 'YouTube.com'
246 _VALID_URL = r"""(?x)^
248 (?:https?://|//) # http(s):// or protocol-independent URL
249 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
250 (?:www\.)?deturl\.com/www\.youtube\.com/|
251 (?:www\.)?pwnyoutube\.com/|
252 (?:www\.)?yourepeat\.com/|
253 tube\.majestyc\.net/|
254 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
255 (?:.*?\#/)? # handle anchor (#/) redirect urls
256 (?: # the various things that can precede the ID:
257 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
258 |(?: # or the v= param in all its forms
259 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
260 (?:\?|\#!?) # the params delimiter ? or # or #!
261 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
266 youtu\.be| # just youtu.be/xxxx
267 vid\.plus # or vid.plus/xxxx
269 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
271 )? # all until now is optional -> you can pass the naked ID
272 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
273 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
274 (?(1).+)? # if we found the ID, everything can follow
276 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
278 '5': {'ext': 'flv', 'width': 400, 'height': 240},
279 '6': {'ext': 'flv', 'width': 450, 'height': 270},
280 '13': {'ext': '3gp'},
281 '17': {'ext': '3gp', 'width': 176, 'height': 144},
282 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
283 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
284 '34': {'ext': 'flv', 'width': 640, 'height': 360},
285 '35': {'ext': 'flv', 'width': 854, 'height': 480},
286 '36': {'ext': '3gp', 'width': 320, 'height': 240},
287 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
288 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
289 '43': {'ext': 'webm', 'width': 640, 'height': 360},
290 '44': {'ext': 'webm', 'width': 854, 'height': 480},
291 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
292 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
293 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
294 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
298 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
299 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
300 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
301 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
302 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
303 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
304 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
306 # Apple HTTP Live Streaming
307 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
308 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
309 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
310 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
311 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
312 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
313 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
316 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
317 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
318 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
319 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
320 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
321 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
322 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
323 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
324 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
325 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
326 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
329 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
330 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
331 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
334 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
335 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
336 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
337 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
338 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
339 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
340 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
341 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
342 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
343 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
344 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
345 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
346 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
347 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
348 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
349 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
350 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
351 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
352 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
353 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
354 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
357 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
358 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
360 # Dash webm audio with opus inside
361 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
362 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
363 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
366 '_rtmp': {'protocol': 'rtmp'},
372 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
376 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
377 'uploader': 'Philipp Hagemeister',
378 'uploader_id': 'phihag',
379 'upload_date': '20121002',
380 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
381 'categories': ['Science & Technology'],
382 'tags': ['youtube-dl'],
384 'dislike_count': int,
390 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
391 'note': 'Test generic use_cipher_signature video (#897)',
395 'upload_date': '20120506',
396 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
397 'description': 'md5:782e8651347686cba06e58f71ab51773',
398 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
399 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
400 'iconic ep', 'iconic', 'love', 'it'],
401 'uploader': 'Icona Pop',
402 'uploader_id': 'IconaPop',
406 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
407 'note': 'Test VEVO video with age protection (#956)',
411 'upload_date': '20130703',
412 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
413 'description': 'md5:64249768eec3bc4276236606ea996373',
414 'uploader': 'justintimberlakeVEVO',
415 'uploader_id': 'justintimberlakeVEVO',
420 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
421 'note': 'Embed-only video (#1746)',
425 'upload_date': '20120608',
426 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
427 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
428 'uploader': 'SET India',
429 'uploader_id': 'setindia'
433 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
434 'note': 'Use the first video ID in the URL',
438 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
439 'uploader': 'Philipp Hagemeister',
440 'uploader_id': 'phihag',
441 'upload_date': '20121002',
442 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
443 'categories': ['Science & Technology'],
444 'tags': ['youtube-dl'],
446 'dislike_count': int,
449 'skip_download': True,
453 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
454 'note': '256k DASH audio (format 141) via DASH manifest',
458 'upload_date': '20121002',
459 'uploader_id': '8KVIDEO',
461 'uploader': '8KVIDEO',
462 'title': 'UHDTV TEST 8K VIDEO.mp4'
465 'youtube_include_dash_manifest': True,
469 # DASH manifest with encrypted signature
471 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
475 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
476 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
477 'uploader': 'AfrojackVEVO',
478 'uploader_id': 'AfrojackVEVO',
479 'upload_date': '20131011',
482 'youtube_include_dash_manifest': True,
486 # JS player signature function name containing $
488 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
492 'title': 'Taylor Swift - Shake It Off',
493 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
494 'uploader': 'TaylorSwiftVEVO',
495 'uploader_id': 'TaylorSwiftVEVO',
496 'upload_date': '20140818',
499 'youtube_include_dash_manifest': True,
505 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
509 'upload_date': '20100909',
510 'uploader': 'The Amazing Atheist',
511 'uploader_id': 'TheAmazingAtheist',
512 'title': 'Burning Everyone\'s Koran',
513 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
516 # Normal age-gate video (No vevo, embed allowed)
518 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
522 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
523 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
524 'uploader': 'The Witcher',
525 'uploader_id': 'WitcherGame',
526 'upload_date': '20140605',
530 # Age-gate video with encrypted signature
532 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
536 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
537 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
538 'uploader': 'LloydVEVO',
539 'uploader_id': 'LloydVEVO',
540 'upload_date': '20110629',
544 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
546 'url': '__2ABJjxzNo',
550 'upload_date': '20100430',
551 'uploader_id': 'deadmau5',
552 'description': 'md5:12c56784b8032162bb936a5f76d55360',
553 'uploader': 'deadmau5',
554 'title': 'Deadmau5 - Some Chords (HD)',
556 'expected_warnings': [
557 'DASH manifest missing',
560 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
562 'url': 'lqQg6PlCWgI',
566 'upload_date': '20120724',
567 'uploader_id': 'olympic',
568 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
569 'uploader': 'Olympics',
570 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
573 'skip_download': 'requires avconv',
578 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
582 'stretched_ratio': 16 / 9.,
583 'upload_date': '20110310',
584 'uploader_id': 'AllenMeow',
585 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
587 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
590 # url_encoded_fmt_stream_map is empty string
592 'url': 'qEJwOuvDf7I',
596 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
598 'upload_date': '20150404',
599 'uploader_id': 'spbelect',
600 'uploader': 'Наблюдатели Петербурга',
603 'skip_download': 'requires avconv',
606 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
608 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
612 'title': 'md5:7b81415841e02ecd4313668cde88737a',
613 'description': 'md5:116377fd2963b81ec4ce64b542173306',
614 'upload_date': '20150625',
615 'uploader_id': 'dorappi2000',
616 'uploader': 'dorappi2000',
617 'formats': 'mincount:33',
620 # DASH manifest with segment_list
622 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
623 'md5': '8ce563a1d667b599d21064e982ab9e31',
627 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
628 'uploader': 'Airtek',
629 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
630 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
631 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
634 'youtube_include_dash_manifest': True,
635 'format': '135', # bestvideo
639 # Multifeed videos (multiple cameras), URL is for Main Camera
640 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
643 'title': 'teamPGP: Rocket League Noob Stream',
644 'description': 'md5:dc7872fb300e143831327f1bae3af010',
650 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
651 'description': 'md5:dc7872fb300e143831327f1bae3af010',
652 'upload_date': '20150721',
653 'uploader': 'Beer Games Beer',
654 'uploader_id': 'beergamesbeer',
660 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
661 'description': 'md5:dc7872fb300e143831327f1bae3af010',
662 'upload_date': '20150721',
663 'uploader': 'Beer Games Beer',
664 'uploader_id': 'beergamesbeer',
670 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
671 'description': 'md5:dc7872fb300e143831327f1bae3af010',
672 'upload_date': '20150721',
673 'uploader': 'Beer Games Beer',
674 'uploader_id': 'beergamesbeer',
680 'title': 'teamPGP: Rocket League Noob Stream (zim)',
681 'description': 'md5:dc7872fb300e143831327f1bae3af010',
682 'upload_date': '20150721',
683 'uploader': 'Beer Games Beer',
684 'uploader_id': 'beergamesbeer',
688 'skip_download': True,
692 'url': 'http://vid.plus/FlRa-iH7PGw',
693 'only_matching': True,
696 # Title with JS-like syntax "};"
697 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
701 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
702 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
703 'upload_date': '20151119',
704 'uploader_id': 'IronSoulElf',
705 'uploader': 'IronSoulElf',
708 'skip_download': True,
713 def __init__(self, *args, **kwargs):
714 super(YoutubeIE, self).__init__(*args, **kwargs)
715 self._player_cache = {}
717 def report_video_info_webpage_download(self, video_id):
718 """Report attempt to download video info webpage."""
719 self.to_screen('%s: Downloading video info webpage' % video_id)
721 def report_information_extraction(self, video_id):
722 """Report attempt to extract video information."""
723 self.to_screen('%s: Extracting video information' % video_id)
725 def report_unavailable_format(self, video_id, format):
726 """Report extracted video URL."""
727 self.to_screen('%s: Format %s not available' % (video_id, format))
729 def report_rtmp_download(self):
730 """Indicate the download will use the RTMP protocol."""
731 self.to_screen('RTMP download detected')
733 def _signature_cache_id(self, example_sig):
734 """ Return a string representation of a signature """
735 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
737 def _extract_signature_function(self, video_id, player_url, example_sig):
739 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
742 raise ExtractorError('Cannot identify player %r' % player_url)
743 player_type = id_m.group('ext')
744 player_id = id_m.group('id')
746 # Read from filesystem cache
747 func_id = '%s_%s_%s' % (
748 player_type, player_id, self._signature_cache_id(example_sig))
749 assert os.path.basename(func_id) == func_id
751 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
752 if cache_spec is not None:
753 return lambda s: ''.join(s[i] for i in cache_spec)
756 'Downloading player %s' % player_url
757 if self._downloader.params.get('verbose') else
758 'Downloading %s player %s' % (player_type, player_id)
760 if player_type == 'js':
761 code = self._download_webpage(
762 player_url, video_id,
764 errnote='Download of %s failed' % player_url)
765 res = self._parse_sig_js(code)
766 elif player_type == 'swf':
767 urlh = self._request_webpage(
768 player_url, video_id,
770 errnote='Download of %s failed' % player_url)
772 res = self._parse_sig_swf(code)
774 assert False, 'Invalid player type %r' % player_type
776 test_string = ''.join(map(compat_chr, range(len(example_sig))))
777 cache_res = res(test_string)
778 cache_spec = [ord(c) for c in cache_res]
780 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
783 def _print_sig_code(self, func, example_sig):
784 def gen_sig_code(idxs):
785 def _genslice(start, end, step):
786 starts = '' if start == 0 else str(start)
787 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
788 steps = '' if step == 1 else (':%d' % step)
789 return 's[%s%s%s]' % (starts, ends, steps)
792 # Quelch pyflakes warnings - start will be set when step is set
793 start = '(Never used)'
794 for i, prev in zip(idxs[1:], idxs[:-1]):
798 yield _genslice(start, prev, step)
801 if i - prev in [-1, 1]:
810 yield _genslice(start, i, step)
812 test_string = ''.join(map(compat_chr, range(len(example_sig))))
813 cache_res = func(test_string)
814 cache_spec = [ord(c) for c in cache_res]
815 expr_code = ' + '.join(gen_sig_code(cache_spec))
816 signature_id_tuple = '(%s)' % (
817 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
818 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
819 ' return %s\n') % (signature_id_tuple, expr_code)
820 self.to_screen('Extracted signature function:\n' + code)
822 def _parse_sig_js(self, jscode):
823 funcname = self._search_regex(
824 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
825 'Initial JS player signature function name')
827 jsi = JSInterpreter(jscode)
828 initial_function = jsi.extract_function(funcname)
829 return lambda s: initial_function([s])
831 def _parse_sig_swf(self, file_contents):
832 swfi = SWFInterpreter(file_contents)
833 TARGET_CLASSNAME = 'SignatureDecipher'
834 searched_class = swfi.extract_class(TARGET_CLASSNAME)
835 initial_function = swfi.extract_function(searched_class, 'decipher')
836 return lambda s: initial_function([s])
838 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
839 """Turn the encrypted s field into a working signature"""
841 if player_url is None:
842 raise ExtractorError('Cannot decrypt signature without player_url')
844 if player_url.startswith('//'):
845 player_url = 'https:' + player_url
847 player_id = (player_url, self._signature_cache_id(s))
848 if player_id not in self._player_cache:
849 func = self._extract_signature_function(
850 video_id, player_url, s
852 self._player_cache[player_id] = func
853 func = self._player_cache[player_id]
854 if self._downloader.params.get('youtube_print_sig_code'):
855 self._print_sig_code(func, s)
857 except Exception as e:
858 tb = traceback.format_exc()
859 raise ExtractorError(
860 'Signature extraction failed: ' + tb, cause=e)
862 def _get_subtitles(self, video_id, webpage):
864 subs_doc = self._download_xml(
865 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
866 video_id, note=False)
867 except ExtractorError as err:
868 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
872 for track in subs_doc.findall('track'):
873 lang = track.attrib['lang_code']
874 if lang in sub_lang_list:
877 for ext in ['sbv', 'vtt', 'srt']:
878 params = compat_urllib_parse.urlencode({
882 'name': track.attrib['name'].encode('utf-8'),
885 'url': 'https://www.youtube.com/api/timedtext?' + params,
888 sub_lang_list[lang] = sub_formats
889 if not sub_lang_list:
890 self._downloader.report_warning('video doesn\'t have subtitles')
894 def _get_ytplayer_config(self, webpage):
896 r';ytplayer\.config\s*=\s*({.*?});ytplayer',
897 r';ytplayer\.config\s*=\s*({.*?});',
899 config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
900 if config is not None:
901 return json.loads(uppercase_escape(config))
903 def _get_automatic_captions(self, video_id, webpage):
904 """We need the webpage for getting the captions url, pass it as an
905 argument to speed up the process."""
906 self.to_screen('%s: Looking for automatic captions' % video_id)
907 player_config = self._get_ytplayer_config(webpage)
908 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
909 if player_config is None:
910 self._downloader.report_warning(err_msg)
913 args = player_config['args']
914 caption_url = args['ttsurl']
915 timestamp = args['timestamp']
916 # We get the available subtitles
917 list_params = compat_urllib_parse.urlencode({
922 list_url = caption_url + '&' + list_params
923 caption_list = self._download_xml(list_url, video_id)
924 original_lang_node = caption_list.find('track')
925 if original_lang_node is None:
926 self._downloader.report_warning('Video doesn\'t have automatic captions')
928 original_lang = original_lang_node.attrib['lang_code']
929 caption_kind = original_lang_node.attrib.get('kind', '')
932 for lang_node in caption_list.findall('target'):
933 sub_lang = lang_node.attrib['lang_code']
935 for ext in ['sbv', 'vtt', 'srt']:
936 params = compat_urllib_parse.urlencode({
937 'lang': original_lang,
941 'kind': caption_kind,
944 'url': caption_url + '&' + params,
947 sub_lang_list[sub_lang] = sub_formats
949 # An extractor error can be raise by the download process if there are
950 # no automatic captions but there are subtitles
951 except (KeyError, ExtractorError):
952 self._downloader.report_warning(err_msg)
956 def extract_id(cls, url):
957 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
959 raise ExtractorError('Invalid URL: %s' % url)
960 video_id = mobj.group(2)
963 def _extract_from_m3u8(self, manifest_url, video_id):
966 def _get_urls(_manifest):
967 lines = _manifest.split('\n')
968 urls = filter(lambda l: l and not l.startswith('#'),
971 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
972 formats_urls = _get_urls(manifest)
973 for format_url in formats_urls:
974 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
975 url_map[itag] = format_url
978 def _extract_annotations(self, video_id):
979 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
980 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
982 def _parse_dash_manifest(
983 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
984 def decrypt_sig(mobj):
986 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
987 return '/signature/%s' % dec_s
988 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
989 dash_doc = self._download_xml(
990 dash_manifest_url, video_id,
991 note='Downloading DASH manifest',
992 errnote='Could not download DASH manifest',
995 if dash_doc is False:
999 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
1000 mime_type = a.attrib.get('mimeType')
1001 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1002 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1005 if mime_type == 'text/vtt':
1006 # TODO implement WebVTT downloading
1008 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
1009 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
1010 format_id = r.attrib['id']
1011 video_url = url_el.text
1012 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1014 'format_id': format_id,
1016 'width': int_or_none(r.attrib.get('width')),
1017 'height': int_or_none(r.attrib.get('height')),
1018 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1019 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1020 'filesize': filesize,
1021 'fps': int_or_none(r.attrib.get('frameRate')),
1023 if segment_list is not None:
1025 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1026 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1027 'protocol': 'http_dash_segments',
1030 existing_format = next(
1031 fo for fo in formats
1032 if fo['format_id'] == format_id)
1033 except StopIteration:
1034 full_info = self._formats.get(format_id, {}).copy()
1036 codecs = r.attrib.get('codecs')
1038 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1039 full_info['vcodec'] = codecs
1040 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1041 full_info['acodec'] = codecs
1042 formats.append(full_info)
1044 existing_format.update(f)
1046 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1049 def _real_extract(self, url):
1050 url, smuggled_data = unsmuggle_url(url, {})
1053 'http' if self._downloader.params.get('prefer_insecure', False)
1058 parsed_url = compat_urllib_parse_urlparse(url)
1059 for component in [parsed_url.fragment, parsed_url.query]:
1060 query = compat_parse_qs(component)
1061 if start_time is None and 't' in query:
1062 start_time = parse_duration(query['t'][0])
1063 if start_time is None and 'start' in query:
1064 start_time = parse_duration(query['start'][0])
1065 if end_time is None and 'end' in query:
1066 end_time = parse_duration(query['end'][0])
1068 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1069 mobj = re.search(self._NEXT_URL_RE, url)
1071 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1072 video_id = self.extract_id(url)
1075 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1076 video_webpage = self._download_webpage(url, video_id)
1078 # Attempt to extract SWF player URL
1079 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1080 if mobj is not None:
1081 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1087 def add_dash_mpd(video_info):
1088 dash_mpd = video_info.get('dashmpd')
1089 if dash_mpd and dash_mpd[0] not in dash_mpds:
1090 dash_mpds.append(dash_mpd[0])
1093 embed_webpage = None
1095 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1097 # We simulate the access to the video from www.youtube.com/v/{video_id}
1098 # this can be viewed without login into Youtube
1099 url = proto + '://www.youtube.com/embed/%s' % video_id
1100 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1101 data = compat_urllib_parse.urlencode({
1102 'video_id': video_id,
1103 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1104 'sts': self._search_regex(
1105 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1107 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1108 video_info_webpage = self._download_webpage(
1109 video_info_url, video_id,
1110 note='Refetching age-gated info webpage',
1111 errnote='unable to download video info webpage')
1112 video_info = compat_parse_qs(video_info_webpage)
1113 add_dash_mpd(video_info)
1117 # Try looking directly into the video webpage
1118 ytplayer_config = self._get_ytplayer_config(video_webpage)
1119 if ytplayer_config is not None:
1120 args = ytplayer_config['args']
1121 if args.get('url_encoded_fmt_stream_map'):
1122 # Convert to the same format returned by compat_parse_qs
1123 video_info = dict((k, [v]) for k, v in args.items())
1124 add_dash_mpd(video_info)
1125 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1127 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1128 # We also try looking in get_video_info since it may contain different dashmpd
1129 # URL that points to a DASH manifest with possibly different itag set (some itags
1130 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1131 # manifest pointed by get_video_info's dashmpd).
1132 # The general idea is to take a union of itags of both DASH manifests (for example
1133 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1134 self.report_video_info_webpage_download(video_id)
1135 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1137 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1138 % (proto, video_id, el_type))
1139 video_info_webpage = self._download_webpage(
1141 video_id, note=False,
1142 errnote='unable to download video info webpage')
1143 get_video_info = compat_parse_qs(video_info_webpage)
1144 if get_video_info.get('use_cipher_signature') != ['True']:
1145 add_dash_mpd(get_video_info)
1147 video_info = get_video_info
1148 if 'token' in get_video_info:
1149 # Different get_video_info requests may report different results, e.g.
1150 # some may report video unavailability, but some may serve it without
1151 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1152 # the original webpage as well as el=info and el=embedded get_video_info
1153 # requests report video unavailability due to geo restriction while
1154 # el=detailpage succeeds and returns valid data). This is probably
1155 # due to YouTube measures against IP ranges of hosting providers.
1156 # Working around by preferring the first succeeded video_info containing
1157 # the token if no such video_info yet was found.
1158 if 'token' not in video_info:
1159 video_info = get_video_info
1161 if 'token' not in video_info:
1162 if 'reason' in video_info:
1163 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1164 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1166 raise ExtractorError('YouTube said: This video is available in %s only' % (
1167 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1169 raise ExtractorError(
1170 'YouTube said: %s' % video_info['reason'][0],
1171 expected=True, video_id=video_id)
1173 raise ExtractorError(
1174 '"token" parameter not in video info for unknown reason',
1178 if 'title' in video_info:
1179 video_title = video_info['title'][0]
1181 self._downloader.report_warning('Unable to extract video title')
1185 video_description = get_element_by_id("eow-description", video_webpage)
1186 if video_description:
1187 video_description = re.sub(r'''(?x)
1189 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1191 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1192 class="yt-uix-redirect-link"\s*>
1195 ''', r'\1', video_description)
1196 video_description = clean_html(video_description)
1198 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1200 video_description = unescapeHTML(fd_mobj.group(1))
1202 video_description = ''
1204 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1205 if not self._downloader.params.get('noplaylist'):
1208 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1209 for feed in multifeed_metadata_list.split(','):
1210 feed_data = compat_parse_qs(feed)
1212 '_type': 'url_transparent',
1213 'ie_key': 'Youtube',
1215 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1216 {'force_singlefeed': True}),
1217 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1219 feed_ids.append(feed_data['id'][0])
1221 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1222 % (', '.join(feed_ids), video_id))
1223 return self.playlist_result(entries, video_id, video_title, video_description)
1224 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1226 if 'view_count' in video_info:
1227 view_count = int(video_info['view_count'][0])
1231 # Check for "rental" videos
1232 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1233 raise ExtractorError('"rental" videos not supported')
1235 # Start extracting information
1236 self.report_information_extraction(video_id)
1239 if 'author' not in video_info:
1240 raise ExtractorError('Unable to extract uploader name')
1241 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1244 video_uploader_id = None
1245 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1246 if mobj is not None:
1247 video_uploader_id = mobj.group(1)
1249 self._downloader.report_warning('unable to extract uploader nickname')
1252 # We try first to get a high quality image:
1253 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1254 video_webpage, re.DOTALL)
1255 if m_thumb is not None:
1256 video_thumbnail = m_thumb.group(1)
1257 elif 'thumbnail_url' not in video_info:
1258 self._downloader.report_warning('unable to extract video thumbnail')
1259 video_thumbnail = None
1260 else: # don't panic if we can't find it
1261 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1264 upload_date = self._html_search_meta(
1265 'datePublished', video_webpage, 'upload date', default=None)
1267 upload_date = self._search_regex(
1268 [r'(?s)id="eow-date.*?>(.*?)</span>',
1269 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1270 video_webpage, 'upload date', default=None)
1272 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1273 upload_date = unified_strdate(upload_date)
1275 m_cat_container = self._search_regex(
1276 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1277 video_webpage, 'categories', default=None)
1279 category = self._html_search_regex(
1280 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1282 video_categories = None if category is None else [category]
1284 video_categories = None
1287 unescapeHTML(m.group('content'))
1288 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1290 def _extract_count(count_name):
1291 return str_to_int(self._search_regex(
1292 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1293 % re.escape(count_name),
1294 video_webpage, count_name, default=None))
1296 like_count = _extract_count('like')
1297 dislike_count = _extract_count('dislike')
1300 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1301 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1303 if 'length_seconds' not in video_info:
1304 self._downloader.report_warning('unable to extract video duration')
1305 video_duration = None
1307 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1310 video_annotations = None
1311 if self._downloader.params.get('writeannotations', False):
1312 video_annotations = self._extract_annotations(video_id)
1314 def _map_to_format_list(urlmap):
1316 for itag, video_real_url in urlmap.items():
1319 'url': video_real_url,
1320 'player_url': player_url,
1322 if itag in self._formats:
1323 dct.update(self._formats[itag])
1327 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1328 self.report_rtmp_download()
1330 'format_id': '_rtmp',
1332 'url': video_info['conn'][0],
1333 'player_url': player_url,
1335 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1336 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1337 if 'rtmpe%3Dyes' in encoded_url_map:
1338 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1340 for url_data_str in encoded_url_map.split(','):
1341 url_data = compat_parse_qs(url_data_str)
1342 if 'itag' not in url_data or 'url' not in url_data:
1344 format_id = url_data['itag'][0]
1345 url = url_data['url'][0]
1347 if 'sig' in url_data:
1348 url += '&signature=' + url_data['sig'][0]
1349 elif 's' in url_data:
1350 encrypted_sig = url_data['s'][0]
1351 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1353 jsplayer_url_json = self._search_regex(
1355 embed_webpage if age_gate else video_webpage,
1356 'JS player URL (1)', default=None)
1357 if not jsplayer_url_json and not age_gate:
1358 # We need the embed website after all
1359 if embed_webpage is None:
1360 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1361 embed_webpage = self._download_webpage(
1362 embed_url, video_id, 'Downloading embed webpage')
1363 jsplayer_url_json = self._search_regex(
1364 ASSETS_RE, embed_webpage, 'JS player URL')
1366 player_url = json.loads(jsplayer_url_json)
1367 if player_url is None:
1368 player_url_json = self._search_regex(
1369 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1370 video_webpage, 'age gate player URL')
1371 player_url = json.loads(player_url_json)
1373 if self._downloader.params.get('verbose'):
1374 if player_url is None:
1375 player_version = 'unknown'
1376 player_desc = 'unknown'
1378 if player_url.endswith('swf'):
1379 player_version = self._search_regex(
1380 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1381 'flash player', fatal=False)
1382 player_desc = 'flash player %s' % player_version
1384 player_version = self._search_regex(
1385 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1387 'html5 player', fatal=False)
1388 player_desc = 'html5 player %s' % player_version
1390 parts_sizes = self._signature_cache_id(encrypted_sig)
1391 self.to_screen('{%s} signature length %s, %s' %
1392 (format_id, parts_sizes, player_desc))
1394 signature = self._decrypt_signature(
1395 encrypted_sig, video_id, player_url, age_gate)
1396 url += '&signature=' + signature
1397 if 'ratebypass' not in url:
1398 url += '&ratebypass=yes'
1400 # Some itags are not included in DASH manifest thus corresponding formats will
1401 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1402 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1403 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1404 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1406 'format_id': format_id,
1408 'player_url': player_url,
1409 'filesize': int_or_none(url_data.get('clen', [None])[0]),
1410 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1413 'fps': int_or_none(url_data.get('fps', [None])[0]),
1414 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1416 type_ = url_data.get('type', [None])[0]
1418 type_split = type_.split(';')
1419 kind_ext = type_split[0].split('/')
1420 if len(kind_ext) == 2:
1421 kind, ext = kind_ext
1423 if kind in ('audio', 'video'):
1425 for mobj in re.finditer(
1426 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1427 if mobj.group('key') == 'codecs':
1428 codecs = mobj.group('val')
1431 codecs = codecs.split(',')
1432 if len(codecs) == 2:
1433 acodec, vcodec = codecs[0], codecs[1]
1435 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1440 if format_id in self._formats:
1441 dct.update(self._formats[format_id])
1443 elif video_info.get('hlsvp'):
1444 manifest_url = video_info['hlsvp'][0]
1445 url_map = self._extract_from_m3u8(manifest_url, video_id)
1446 formats = _map_to_format_list(url_map)
1448 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1450 # Look for the DASH manifest
1451 if self._downloader.params.get('youtube_include_dash_manifest', True):
1452 dash_mpd_fatal = True
1453 for dash_manifest_url in dash_mpds:
1456 for df in self._parse_dash_manifest(
1457 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1458 # Do not overwrite DASH format found in some previous DASH manifest
1459 if df['format_id'] not in dash_formats:
1460 dash_formats[df['format_id']] = df
1461 # Additional DASH manifests may end up in HTTP Error 403 therefore
1462 # allow them to fail without bug report message if we already have
1463 # some DASH manifest succeeded. This is temporary workaround to reduce
1464 # burst of bug reports until we figure out the reason and whether it
1465 # can be fixed at all.
1466 dash_mpd_fatal = False
1467 except (ExtractorError, KeyError) as e:
1468 self.report_warning(
1469 'Skipping DASH manifest: %r' % e, video_id)
1471 # Remove the formats we found through non-DASH, they
1472 # contain less info and it can be wrong, because we use
1473 # fixed values (for example the resolution). See
1474 # https://github.com/rg3/youtube-dl/issues/5774 for an
1476 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1477 formats.extend(dash_formats.values())
1479 # Check for malformed aspect ratio
1480 stretched_m = re.search(
1481 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1484 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1486 if f.get('vcodec') != 'none':
1487 f['stretched_ratio'] = ratio
1489 self._sort_formats(formats)
1493 'uploader': video_uploader,
1494 'uploader_id': video_uploader_id,
1495 'upload_date': upload_date,
1496 'title': video_title,
1497 'thumbnail': video_thumbnail,
1498 'description': video_description,
1499 'categories': video_categories,
1501 'subtitles': video_subtitles,
1502 'automatic_captions': automatic_captions,
1503 'duration': video_duration,
1504 'age_limit': 18 if age_gate else 0,
1505 'annotations': video_annotations,
1506 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1507 'view_count': view_count,
1508 'like_count': like_count,
1509 'dislike_count': dislike_count,
1510 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1513 'start_time': start_time,
1514 'end_time': end_time,
1518 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1519 IE_DESC = 'YouTube.com playlists'
1520 _VALID_URL = r"""(?x)(?:
1525 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1526 \? (?:.*?&)*? (?:p|a|list)=
1530 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1531 # Top tracks, they can also include dots
1536 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1538 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1539 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1540 IE_NAME = 'youtube:playlist'
1542 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1544 'title': 'ytdl test PL',
1545 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1547 'playlist_count': 3,
1549 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1551 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1552 'title': 'YDL_Empty_List',
1554 'playlist_count': 0,
1556 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1557 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1559 'title': '29C3: Not my department',
1560 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1562 'playlist_count': 95,
1564 'note': 'issue #673',
1565 'url': 'PLBB231211A4F62143',
1567 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1568 'id': 'PLBB231211A4F62143',
1570 'playlist_mincount': 26,
1572 'note': 'Large playlist',
1573 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1575 'title': 'Uploads from Cauchemar',
1576 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1578 'playlist_mincount': 799,
1580 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1582 'title': 'YDL_safe_search',
1583 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1585 'playlist_count': 2,
1588 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1589 'playlist_count': 4,
1592 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1595 'note': 'Embedded SWF player',
1596 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1597 'playlist_count': 4,
1600 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1603 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1604 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1606 'title': 'Uploads from Interstellar Movie',
1607 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1609 'playlist_mincout': 21,
1612 def _real_initialize(self):
1615 def _extract_mix(self, playlist_id):
1616 # The mixes are generated from a single video
1617 # the id of the playlist is just 'RD' + video_id
1618 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1619 webpage = self._download_webpage(
1620 url, playlist_id, 'Downloading Youtube mix')
1621 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1623 search_title('playlist-title') or
1624 search_title('title long-title') or
1625 search_title('title'))
1626 title = clean_html(title_span)
1627 ids = orderedSet(re.findall(
1628 r'''(?xs)data-video-username=".*?".*?
1629 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1631 url_results = self._ids_to_results(ids)
1633 return self.playlist_result(url_results, playlist_id, title)
1635 def _extract_playlist(self, playlist_id):
1636 url = self._TEMPLATE_URL % playlist_id
1637 page = self._download_webpage(url, playlist_id)
1639 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1640 match = match.strip()
1641 # Check if the playlist exists or is private
1642 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1643 raise ExtractorError(
1644 'The playlist doesn\'t exist or is private, use --username or '
1645 '--netrc to access it.',
1647 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1648 raise ExtractorError(
1649 'Invalid parameters. Maybe URL is incorrect.',
1651 elif re.match(r'[^<]*Choose your language[^<]*', match):
1654 self.report_warning('Youtube gives an alert message: ' + match)
1656 playlist_title = self._html_search_regex(
1657 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1660 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1662 def _real_extract(self, url):
1663 # Extract playlist id
1664 mobj = re.match(self._VALID_URL, url)
1666 raise ExtractorError('Invalid URL: %s' % url)
1667 playlist_id = mobj.group(1) or mobj.group(2)
1669 # Check if it's a video-specific URL
1670 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1671 if 'v' in query_dict:
1672 video_id = query_dict['v'][0]
1673 if self._downloader.params.get('noplaylist'):
1674 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1675 return self.url_result(video_id, 'Youtube', video_id=video_id)
1677 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1679 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1680 # Mixes require a custom extraction process
1681 return self._extract_mix(playlist_id)
1683 return self._extract_playlist(playlist_id)
1686 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1687 IE_DESC = 'YouTube.com channels'
1688 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1689 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1690 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1691 IE_NAME = 'youtube:channel'
1693 'note': 'paginated channel',
1694 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1695 'playlist_mincount': 91,
1697 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1698 'title': 'Uploads from lex will',
1701 'note': 'Age restricted channel',
1702 # from https://www.youtube.com/user/DeusExOfficial
1703 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1704 'playlist_mincount': 64,
1706 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1707 'title': 'Uploads from Deus Ex',
1711 def _real_extract(self, url):
1712 channel_id = self._match_id(url)
1714 url = self._TEMPLATE_URL % channel_id
1716 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1717 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1718 # otherwise fallback on channel by page extraction
1719 channel_page = self._download_webpage(
1720 url + '?view=57', channel_id,
1721 'Downloading channel page', fatal=False)
1722 if channel_page is False:
1723 channel_playlist_id = False
1725 channel_playlist_id = self._html_search_meta(
1726 'channelId', channel_page, 'channel id', default=None)
1727 if not channel_playlist_id:
1728 channel_playlist_id = self._search_regex(
1729 r'data-(?:channel-external-|yt)id="([^"]+)"',
1730 channel_page, 'channel id', default=None)
1731 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1732 playlist_id = 'UU' + channel_playlist_id[2:]
1733 return self.url_result(
1734 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1736 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1737 autogenerated = re.search(r'''(?x)
1739 channel-header-autogenerated-label|
1740 yt-channel-title-autogenerated
1741 )[^"]*"''', channel_page) is not None
1744 # The videos are contained in a single page
1745 # the ajax pages can't be used, they are empty
1748 video_id, 'Youtube', video_id=video_id,
1749 video_title=video_title)
1750 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1751 return self.playlist_result(entries, channel_id)
1753 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1756 class YoutubeUserIE(YoutubeChannelIE):
1757 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1758 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1759 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1760 IE_NAME = 'youtube:user'
1763 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1764 'playlist_mincount': 320,
1766 'title': 'TheLinuxFoundation',
1769 'url': 'ytuser:phihag',
1770 'only_matching': True,
1774 def suitable(cls, url):
1775 # Don't return True if the url can be extracted with other youtube
1776 # extractor, the regex would is too permissive and it would match.
1777 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1778 if any(ie.suitable(url) for ie in other_ies):
1781 return super(YoutubeUserIE, cls).suitable(url)
1784 class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1785 IE_DESC = 'YouTube.com user playlists'
1786 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
1787 IE_NAME = 'youtube:user:playlists'
1790 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1791 'playlist_mincount': 4,
1793 'id': 'ThirstForScience',
1794 'title': 'Thirst for Science',
1797 # with "Load more" button
1798 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
1799 'playlist_mincount': 70,
1802 'title': 'Игорь Клейнер',
1807 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1808 IE_DESC = 'YouTube.com searches'
1809 # there doesn't appear to be a real limit, for example if you search for
1810 # 'python' you get more than 8.000.000 results
1811 _MAX_RESULTS = float('inf')
1812 IE_NAME = 'youtube:search'
1813 _SEARCH_KEY = 'ytsearch'
1814 _EXTRA_QUERY_ARGS = {}
1817 def _get_n_results(self, query, n):
1818 """Get a specified number of results for a query"""
1823 for pagenum in itertools.count(1):
1825 'search_query': query.encode('utf-8'),
1829 url_query.update(self._EXTRA_QUERY_ARGS)
1830 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1831 data = self._download_json(
1832 result_url, video_id='query "%s"' % query,
1833 note='Downloading page %s' % pagenum,
1834 errnote='Unable to download API page')
1835 html_content = data[1]['body']['content']
1837 if 'class="search-message' in html_content:
1838 raise ExtractorError(
1839 '[youtube] No video results', expected=True)
1841 new_videos = self._ids_to_results(orderedSet(re.findall(
1842 r'href="/watch\?v=(.{11})', html_content)))
1843 videos += new_videos
1844 if not new_videos or len(videos) > limit:
1849 return self.playlist_result(videos, query)
1852 class YoutubeSearchDateIE(YoutubeSearchIE):
1853 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1854 _SEARCH_KEY = 'ytsearchdate'
1855 IE_DESC = 'YouTube.com searches, newest videos first'
1856 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1859 class YoutubeSearchURLIE(InfoExtractor):
1860 IE_DESC = 'YouTube.com search URLs'
1861 IE_NAME = 'youtube:search_url'
1862 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1864 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1865 'playlist_mincount': 5,
1867 'title': 'youtube-dl test video',
1871 def _real_extract(self, url):
1872 mobj = re.match(self._VALID_URL, url)
1873 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1875 webpage = self._download_webpage(url, query)
1876 result_code = self._search_regex(
1877 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1879 part_codes = re.findall(
1880 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1882 for part_code in part_codes:
1883 part_title = self._html_search_regex(
1884 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1885 part_url_snippet = self._html_search_regex(
1886 r'(?s)href="([^"]+)"', part_code, 'item URL')
1887 part_url = compat_urlparse.urljoin(
1888 'https://www.youtube.com/', part_url_snippet)
1892 'title': part_title,
1896 '_type': 'playlist',
1902 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
1903 IE_DESC = 'YouTube.com (multi-season) shows'
1904 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1905 IE_NAME = 'youtube:show'
1907 'url': 'https://www.youtube.com/show/airdisasters',
1908 'playlist_mincount': 5,
1910 'id': 'airdisasters',
1911 'title': 'Air Disasters',
1915 def _real_extract(self, url):
1916 playlist_id = self._match_id(url)
1917 return super(YoutubeShowIE, self)._real_extract(
1918 'https://www.youtube.com/show/%s/playlists' % playlist_id)
1921 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1923 Base class for feed extractors
1924 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1926 _LOGIN_REQUIRED = True
1930 return 'youtube:%s' % self._FEED_NAME
1932 def _real_initialize(self):
1935 def _real_extract(self, url):
1936 page = self._download_webpage(
1937 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1939 # The extraction process is the same as for playlists, but the regex
1940 # for the video ids doesn't contain an index
1942 more_widget_html = content_html = page
1943 for page_num in itertools.count(1):
1944 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1946 # 'recommended' feed has infinite 'load more' and each new portion spins
1947 # the same videos in (sometimes) slightly different order, so we'll check
1948 # for unicity and break when portion has no new videos
1949 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1955 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1959 more = self._download_json(
1960 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1961 'Downloading page #%s' % page_num,
1962 transform_source=uppercase_escape)
1963 content_html = more['content_html']
1964 more_widget_html = more['load_more_widget_html']
1966 return self.playlist_result(
1967 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1970 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1971 IE_NAME = 'youtube:watchlater'
1972 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1973 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1975 _TESTS = [] # override PlaylistIE tests
1977 def _real_extract(self, url):
1978 return self._extract_playlist('WL')
1981 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1982 IE_NAME = 'youtube:favorites'
1983 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1984 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1985 _LOGIN_REQUIRED = True
1987 def _real_extract(self, url):
1988 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1989 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1990 return self.url_result(playlist_id, 'YoutubePlaylist')
1993 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1994 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1995 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1996 _FEED_NAME = 'recommended'
1997 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2000 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2001 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2002 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2003 _FEED_NAME = 'subscriptions'
2004 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2007 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2008 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2009 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2010 _FEED_NAME = 'history'
2011 _PLAYLIST_TITLE = 'Youtube History'
2014 class YoutubeTruncatedURLIE(InfoExtractor):
2015 IE_NAME = 'youtube:truncated_url'
2016 IE_DESC = False # Do not list
2017 _VALID_URL = r'''(?x)
2019 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2022 annotation_id=annotation_[^&]+|
2028 attribution_link\?a=[^&]+
2034 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2035 'only_matching': True,
2037 'url': 'http://www.youtube.com/watch?',
2038 'only_matching': True,
2040 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2041 'only_matching': True,
2043 'url': 'https://www.youtube.com/watch?feature=foo',
2044 'only_matching': True,
2046 'url': 'https://www.youtube.com/watch?hl=en-GB',
2047 'only_matching': True,
2049 'url': 'https://www.youtube.com/watch?t=2372',
2050 'only_matching': True,
2053 def _real_extract(self, url):
2054 raise ExtractorError(
2055 'Did you forget to quote the URL? Remember that & is a meta '
2056 'character in most shells, so you want to put the URL in quotes, '
2058 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2059 ' or simply youtube-dl BaW_jenozKc .',
2063 class YoutubeTruncatedIDIE(InfoExtractor):
2064 IE_NAME = 'youtube:truncated_id'
2065 IE_DESC = False # Do not list
2066 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2069 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2070 'only_matching': True,
2073 def _real_extract(self, url):
2074 video_id = self._match_id(url)
2075 raise ExtractorError(
2076 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),