3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
31 get_element_by_attribute,
46 class YoutubeBaseInfoExtractor(InfoExtractor):
47 """Provide base functions for Youtube extractors"""
48 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
49 _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
50 _NETRC_MACHINE = 'youtube'
51 # If True it will raise an error if no login info is provided
52 _LOGIN_REQUIRED = False
54 def _set_language(self):
56 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
57 # YouTube sets the expire time to about two months
58 expire_time=time.time() + 2 * 30 * 24 * 3600)
60 def _ids_to_results(self, ids):
62 self.url_result(vid_id, 'Youtube', video_id=vid_id)
67 Attempt to log in to YouTube.
68 True is returned if successful or skipped.
69 False is returned if login failed.
71 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
73 (username, password) = self._get_login_info()
74 # No authentication to be performed
76 if self._LOGIN_REQUIRED:
77 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
80 login_page = self._download_webpage(
81 self._LOGIN_URL, None,
82 note='Downloading login page',
83 errnote='unable to fetch login page', fatal=False)
84 if login_page is False:
87 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
88 login_page, 'Login GALX parameter')
92 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
97 'PersistentCookie': 'yes',
99 'bgresponse': 'js_disabled',
100 'checkConnection': '',
101 'checkedDomains': 'youtube',
108 'service': 'youtube',
113 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
115 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
116 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
118 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
119 login_results = self._download_webpage(
121 note='Logging in', errnote='unable to log in', fatal=False)
122 if login_results is False:
125 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
126 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
129 # TODO add SMS and phone call support - these require making a request and then prompting the user
131 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
132 tfa_code = self._get_tfa_info()
135 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
136 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 # Unlike the first login form, secTok and timeStmp are both required for the TFA form
141 match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
143 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
144 secTok = match.group(1)
145 match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
147 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
148 timeStmp = match.group(1)
151 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
153 'smsUserPin': tfa_code,
154 'smsVerifyPin': 'Verify',
156 'PersistentCookie': 'yes',
157 'checkConnection': '',
158 'checkedDomains': 'youtube',
161 'timeStmp': timeStmp,
162 'service': 'youtube',
165 tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
166 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
168 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
169 tfa_results = self._download_webpage(
171 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
173 if tfa_results is False:
176 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
177 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
179 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
180 self._downloader.report_warning('unable to log in - did the page structure change?')
182 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
183 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
186 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
187 self._downloader.report_warning('unable to log in: bad username or password')
191 def _real_initialize(self):
192 if self._downloader is None:
195 if not self._login():
199 class YoutubeIE(YoutubeBaseInfoExtractor):
200 IE_DESC = 'YouTube.com'
201 _VALID_URL = r"""(?x)^
203 (?:https?://|//) # http(s):// or protocol-independent URL
204 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
205 (?:www\.)?deturl\.com/www\.youtube\.com/|
206 (?:www\.)?pwnyoutube\.com/|
207 (?:www\.)?yourepeat\.com/|
208 tube\.majestyc\.net/|
209 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
210 (?:.*?\#/)? # handle anchor (#/) redirect urls
211 (?: # the various things that can precede the ID:
212 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
213 |(?: # or the v= param in all its forms
214 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
215 (?:\?|\#!?) # the params delimiter ? or # or #!
216 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
220 |youtu\.be/ # just youtu.be/xxxx
221 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
223 )? # all until now is optional -> you can pass the naked ID
224 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
225 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
226 (?(1).+)? # if we found the ID, everything can follow
228 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
230 '5': {'ext': 'flv', 'width': 400, 'height': 240},
231 '6': {'ext': 'flv', 'width': 450, 'height': 270},
232 '13': {'ext': '3gp'},
233 '17': {'ext': '3gp', 'width': 176, 'height': 144},
234 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
235 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
236 '34': {'ext': 'flv', 'width': 640, 'height': 360},
237 '35': {'ext': 'flv', 'width': 854, 'height': 480},
238 '36': {'ext': '3gp', 'width': 320, 'height': 240},
239 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
240 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
241 '43': {'ext': 'webm', 'width': 640, 'height': 360},
242 '44': {'ext': 'webm', 'width': 854, 'height': 480},
243 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
244 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
245 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
246 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
250 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
251 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
252 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
253 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
254 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
255 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
256 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
258 # Apple HTTP Live Streaming
259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
275 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
276 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
277 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
278 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
281 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
282 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
283 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
286 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
287 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
288 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
289 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
290 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
291 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
292 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
293 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
294 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
295 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
296 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
297 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
298 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
299 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
303 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
304 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
305 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
306 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
309 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
310 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
312 # Dash webm audio with opus inside
313 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
314 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
315 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
318 '_rtmp': {'protocol': 'rtmp'},
324 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
328 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
329 'uploader': 'Philipp Hagemeister',
330 'uploader_id': 'phihag',
331 'upload_date': '20121002',
332 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
333 'categories': ['Science & Technology'],
334 'tags': ['youtube-dl'],
336 'dislike_count': int,
342 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
343 'note': 'Test generic use_cipher_signature video (#897)',
347 'upload_date': '20120506',
348 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
349 'description': 'md5:782e8651347686cba06e58f71ab51773',
350 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
351 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
352 'iconic ep', 'iconic', 'love', 'it'],
353 'uploader': 'Icona Pop',
354 'uploader_id': 'IconaPop',
358 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
359 'note': 'Test VEVO video with age protection (#956)',
363 'upload_date': '20130703',
364 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
365 'description': 'md5:64249768eec3bc4276236606ea996373',
366 'uploader': 'justintimberlakeVEVO',
367 'uploader_id': 'justintimberlakeVEVO',
372 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
373 'note': 'Embed-only video (#1746)',
377 'upload_date': '20120608',
378 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
379 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
380 'uploader': 'SET India',
381 'uploader_id': 'setindia'
385 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
386 'note': 'Use the first video ID in the URL',
390 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
391 'uploader': 'Philipp Hagemeister',
392 'uploader_id': 'phihag',
393 'upload_date': '20121002',
394 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
395 'categories': ['Science & Technology'],
396 'tags': ['youtube-dl'],
398 'dislike_count': int,
401 'skip_download': True,
405 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
406 'note': '256k DASH audio (format 141) via DASH manifest',
410 'upload_date': '20121002',
411 'uploader_id': '8KVIDEO',
413 'uploader': '8KVIDEO',
414 'title': 'UHDTV TEST 8K VIDEO.mp4'
417 'youtube_include_dash_manifest': True,
421 # DASH manifest with encrypted signature
423 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
427 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
428 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
429 'uploader': 'AfrojackVEVO',
430 'uploader_id': 'AfrojackVEVO',
431 'upload_date': '20131011',
434 'youtube_include_dash_manifest': True,
438 # JS player signature function name containing $
440 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
444 'title': 'Taylor Swift - Shake It Off',
445 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
446 'uploader': 'TaylorSwiftVEVO',
447 'uploader_id': 'TaylorSwiftVEVO',
448 'upload_date': '20140818',
451 'youtube_include_dash_manifest': True,
457 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
461 'upload_date': '20100909',
462 'uploader': 'The Amazing Atheist',
463 'uploader_id': 'TheAmazingAtheist',
464 'title': 'Burning Everyone\'s Koran',
465 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
468 # Normal age-gate video (No vevo, embed allowed)
470 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
474 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
475 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
476 'uploader': 'The Witcher',
477 'uploader_id': 'WitcherGame',
478 'upload_date': '20140605',
482 # Age-gate video with encrypted signature
484 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
488 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
489 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
490 'uploader': 'LloydVEVO',
491 'uploader_id': 'LloydVEVO',
492 'upload_date': '20110629',
496 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
498 'url': '__2ABJjxzNo',
502 'upload_date': '20100430',
503 'uploader_id': 'deadmau5',
504 'description': 'md5:12c56784b8032162bb936a5f76d55360',
505 'uploader': 'deadmau5',
506 'title': 'Deadmau5 - Some Chords (HD)',
508 'expected_warnings': [
509 'DASH manifest missing',
512 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
514 'url': 'lqQg6PlCWgI',
518 'upload_date': '20120731',
519 'uploader_id': 'olympic',
520 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
521 'uploader': 'Olympics',
522 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
525 'skip_download': 'requires avconv',
530 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
534 'stretched_ratio': 16 / 9.,
535 'upload_date': '20110310',
536 'uploader_id': 'AllenMeow',
537 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
539 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
542 # url_encoded_fmt_stream_map is empty string
544 'url': 'qEJwOuvDf7I',
548 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
550 'upload_date': '20150404',
551 'uploader_id': 'spbelect',
552 'uploader': 'Наблюдатели Петербурга',
555 'skip_download': 'requires avconv',
558 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
560 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
564 'title': 'md5:7b81415841e02ecd4313668cde88737a',
565 'description': 'md5:116377fd2963b81ec4ce64b542173306',
566 'upload_date': '20150625',
567 'uploader_id': 'dorappi2000',
568 'uploader': 'dorappi2000',
569 'formats': 'mincount:33',
572 # DASH manifest with segment_list
574 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
575 'md5': '8ce563a1d667b599d21064e982ab9e31',
579 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
580 'uploader': 'Airtek',
581 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
582 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
583 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
586 'youtube_include_dash_manifest': True,
587 'format': '135', # bestvideo
591 # Multifeed videos (multiple cameras), URL is for Main Camera
592 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
595 'title': 'teamPGP: Rocket League Noob Stream',
596 'description': 'md5:dc7872fb300e143831327f1bae3af010',
602 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
603 'description': 'md5:dc7872fb300e143831327f1bae3af010',
604 'upload_date': '20150721',
605 'uploader': 'Beer Games Beer',
606 'uploader_id': 'beergamesbeer',
612 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
613 'description': 'md5:dc7872fb300e143831327f1bae3af010',
614 'upload_date': '20150721',
615 'uploader': 'Beer Games Beer',
616 'uploader_id': 'beergamesbeer',
622 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
623 'description': 'md5:dc7872fb300e143831327f1bae3af010',
624 'upload_date': '20150721',
625 'uploader': 'Beer Games Beer',
626 'uploader_id': 'beergamesbeer',
632 'title': 'teamPGP: Rocket League Noob Stream (zim)',
633 'description': 'md5:dc7872fb300e143831327f1bae3af010',
634 'upload_date': '20150721',
635 'uploader': 'Beer Games Beer',
636 'uploader_id': 'beergamesbeer',
640 'skip_download': True,
645 def __init__(self, *args, **kwargs):
646 super(YoutubeIE, self).__init__(*args, **kwargs)
647 self._player_cache = {}
649 def report_video_info_webpage_download(self, video_id):
650 """Report attempt to download video info webpage."""
651 self.to_screen('%s: Downloading video info webpage' % video_id)
653 def report_information_extraction(self, video_id):
654 """Report attempt to extract video information."""
655 self.to_screen('%s: Extracting video information' % video_id)
657 def report_unavailable_format(self, video_id, format):
658 """Report extracted video URL."""
659 self.to_screen('%s: Format %s not available' % (video_id, format))
661 def report_rtmp_download(self):
662 """Indicate the download will use the RTMP protocol."""
663 self.to_screen('RTMP download detected')
665 def _signature_cache_id(self, example_sig):
666 """ Return a string representation of a signature """
667 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
669 def _extract_signature_function(self, video_id, player_url, example_sig):
671 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
674 raise ExtractorError('Cannot identify player %r' % player_url)
675 player_type = id_m.group('ext')
676 player_id = id_m.group('id')
678 # Read from filesystem cache
679 func_id = '%s_%s_%s' % (
680 player_type, player_id, self._signature_cache_id(example_sig))
681 assert os.path.basename(func_id) == func_id
683 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
684 if cache_spec is not None:
685 return lambda s: ''.join(s[i] for i in cache_spec)
688 'Downloading player %s' % player_url
689 if self._downloader.params.get('verbose') else
690 'Downloading %s player %s' % (player_type, player_id)
692 if player_type == 'js':
693 code = self._download_webpage(
694 player_url, video_id,
696 errnote='Download of %s failed' % player_url)
697 res = self._parse_sig_js(code)
698 elif player_type == 'swf':
699 urlh = self._request_webpage(
700 player_url, video_id,
702 errnote='Download of %s failed' % player_url)
704 res = self._parse_sig_swf(code)
706 assert False, 'Invalid player type %r' % player_type
708 test_string = ''.join(map(compat_chr, range(len(example_sig))))
709 cache_res = res(test_string)
710 cache_spec = [ord(c) for c in cache_res]
712 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
715 def _print_sig_code(self, func, example_sig):
716 def gen_sig_code(idxs):
717 def _genslice(start, end, step):
718 starts = '' if start == 0 else str(start)
719 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
720 steps = '' if step == 1 else (':%d' % step)
721 return 's[%s%s%s]' % (starts, ends, steps)
724 # Quelch pyflakes warnings - start will be set when step is set
725 start = '(Never used)'
726 for i, prev in zip(idxs[1:], idxs[:-1]):
730 yield _genslice(start, prev, step)
733 if i - prev in [-1, 1]:
742 yield _genslice(start, i, step)
744 test_string = ''.join(map(compat_chr, range(len(example_sig))))
745 cache_res = func(test_string)
746 cache_spec = [ord(c) for c in cache_res]
747 expr_code = ' + '.join(gen_sig_code(cache_spec))
748 signature_id_tuple = '(%s)' % (
749 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
750 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
751 ' return %s\n') % (signature_id_tuple, expr_code)
752 self.to_screen('Extracted signature function:\n' + code)
754 def _parse_sig_js(self, jscode):
755 funcname = self._search_regex(
756 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
757 'Initial JS player signature function name')
759 jsi = JSInterpreter(jscode)
760 initial_function = jsi.extract_function(funcname)
761 return lambda s: initial_function([s])
763 def _parse_sig_swf(self, file_contents):
764 swfi = SWFInterpreter(file_contents)
765 TARGET_CLASSNAME = 'SignatureDecipher'
766 searched_class = swfi.extract_class(TARGET_CLASSNAME)
767 initial_function = swfi.extract_function(searched_class, 'decipher')
768 return lambda s: initial_function([s])
770 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
771 """Turn the encrypted s field into a working signature"""
773 if player_url is None:
774 raise ExtractorError('Cannot decrypt signature without player_url')
776 if player_url.startswith('//'):
777 player_url = 'https:' + player_url
779 player_id = (player_url, self._signature_cache_id(s))
780 if player_id not in self._player_cache:
781 func = self._extract_signature_function(
782 video_id, player_url, s
784 self._player_cache[player_id] = func
785 func = self._player_cache[player_id]
786 if self._downloader.params.get('youtube_print_sig_code'):
787 self._print_sig_code(func, s)
789 except Exception as e:
790 tb = traceback.format_exc()
791 raise ExtractorError(
792 'Signature extraction failed: ' + tb, cause=e)
794 def _get_subtitles(self, video_id, webpage):
796 subs_doc = self._download_xml(
797 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
798 video_id, note=False)
799 except ExtractorError as err:
800 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
804 for track in subs_doc.findall('track'):
805 lang = track.attrib['lang_code']
806 if lang in sub_lang_list:
809 for ext in ['sbv', 'vtt', 'srt']:
810 params = compat_urllib_parse.urlencode({
814 'name': track.attrib['name'].encode('utf-8'),
817 'url': 'https://www.youtube.com/api/timedtext?' + params,
820 sub_lang_list[lang] = sub_formats
821 if not sub_lang_list:
822 self._downloader.report_warning('video doesn\'t have subtitles')
826 def _get_automatic_captions(self, video_id, webpage):
827 """We need the webpage for getting the captions url, pass it as an
828 argument to speed up the process."""
829 self.to_screen('%s: Looking for automatic captions' % video_id)
830 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
831 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
833 self._downloader.report_warning(err_msg)
835 player_config = json.loads(mobj.group(1))
837 args = player_config['args']
838 caption_url = args['ttsurl']
839 timestamp = args['timestamp']
840 # We get the available subtitles
841 list_params = compat_urllib_parse.urlencode({
846 list_url = caption_url + '&' + list_params
847 caption_list = self._download_xml(list_url, video_id)
848 original_lang_node = caption_list.find('track')
849 if original_lang_node is None:
850 self._downloader.report_warning('Video doesn\'t have automatic captions')
852 original_lang = original_lang_node.attrib['lang_code']
853 caption_kind = original_lang_node.attrib.get('kind', '')
856 for lang_node in caption_list.findall('target'):
857 sub_lang = lang_node.attrib['lang_code']
859 for ext in ['sbv', 'vtt', 'srt']:
860 params = compat_urllib_parse.urlencode({
861 'lang': original_lang,
865 'kind': caption_kind,
868 'url': caption_url + '&' + params,
871 sub_lang_list[sub_lang] = sub_formats
873 # An extractor error can be raise by the download process if there are
874 # no automatic captions but there are subtitles
875 except (KeyError, ExtractorError):
876 self._downloader.report_warning(err_msg)
880 def extract_id(cls, url):
881 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
883 raise ExtractorError('Invalid URL: %s' % url)
884 video_id = mobj.group(2)
887 def _extract_from_m3u8(self, manifest_url, video_id):
890 def _get_urls(_manifest):
891 lines = _manifest.split('\n')
892 urls = filter(lambda l: l and not l.startswith('#'),
895 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
896 formats_urls = _get_urls(manifest)
897 for format_url in formats_urls:
898 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
899 url_map[itag] = format_url
902 def _extract_annotations(self, video_id):
903 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
904 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
906 def _parse_dash_manifest(
907 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
908 def decrypt_sig(mobj):
910 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
911 return '/signature/%s' % dec_s
912 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
913 dash_doc = self._download_xml(
914 dash_manifest_url, video_id,
915 note='Downloading DASH manifest',
916 errnote='Could not download DASH manifest',
919 if dash_doc is False:
923 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
924 mime_type = a.attrib.get('mimeType')
925 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
926 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
929 if mime_type == 'text/vtt':
930 # TODO implement WebVTT downloading
932 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
933 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
934 format_id = r.attrib['id']
935 video_url = url_el.text
936 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
938 'format_id': format_id,
940 'width': int_or_none(r.attrib.get('width')),
941 'height': int_or_none(r.attrib.get('height')),
942 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
943 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
944 'filesize': filesize,
945 'fps': int_or_none(r.attrib.get('frameRate')),
947 if segment_list is not None:
949 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
950 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
951 'protocol': 'http_dash_segments',
954 existing_format = next(
956 if fo['format_id'] == format_id)
957 except StopIteration:
958 full_info = self._formats.get(format_id, {}).copy()
960 codecs = r.attrib.get('codecs')
962 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
963 full_info['vcodec'] = codecs
964 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
965 full_info['acodec'] = codecs
966 formats.append(full_info)
968 existing_format.update(f)
970 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
973 def _real_extract(self, url):
974 url, smuggled_data = unsmuggle_url(url, {})
977 'http' if self._downloader.params.get('prefer_insecure', False)
982 parsed_url = compat_urllib_parse_urlparse(url)
983 for component in [parsed_url.fragment, parsed_url.query]:
984 query = compat_parse_qs(component)
985 if start_time is None and 't' in query:
986 start_time = parse_duration(query['t'][0])
987 if start_time is None and 'start' in query:
988 start_time = parse_duration(query['start'][0])
989 if end_time is None and 'end' in query:
990 end_time = parse_duration(query['end'][0])
992 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
993 mobj = re.search(self._NEXT_URL_RE, url)
995 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
996 video_id = self.extract_id(url)
999 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1000 video_webpage = self._download_webpage(url, video_id)
1002 # Attempt to extract SWF player URL
1003 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1004 if mobj is not None:
1005 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1011 def add_dash_mpd(video_info):
1012 dash_mpd = video_info.get('dashmpd')
1013 if dash_mpd and dash_mpd[0] not in dash_mpds:
1014 dash_mpds.append(dash_mpd[0])
1017 embed_webpage = None
1019 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1021 # We simulate the access to the video from www.youtube.com/v/{video_id}
1022 # this can be viewed without login into Youtube
1023 url = proto + '://www.youtube.com/embed/%s' % video_id
1024 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1025 data = compat_urllib_parse.urlencode({
1026 'video_id': video_id,
1027 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1028 'sts': self._search_regex(
1029 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1031 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1032 video_info_webpage = self._download_webpage(
1033 video_info_url, video_id,
1034 note='Refetching age-gated info webpage',
1035 errnote='unable to download video info webpage')
1036 video_info = compat_parse_qs(video_info_webpage)
1037 add_dash_mpd(video_info)
1041 # Try looking directly into the video webpage
1042 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1044 json_code = uppercase_escape(mobj.group(1))
1045 ytplayer_config = json.loads(json_code)
1046 args = ytplayer_config['args']
1047 if args.get('url_encoded_fmt_stream_map'):
1048 # Convert to the same format returned by compat_parse_qs
1049 video_info = dict((k, [v]) for k, v in args.items())
1050 add_dash_mpd(video_info)
1051 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1053 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1054 # We also try looking in get_video_info since it may contain different dashmpd
1055 # URL that points to a DASH manifest with possibly different itag set (some itags
1056 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1057 # manifest pointed by get_video_info's dashmpd).
1058 # The general idea is to take a union of itags of both DASH manifests (for example
1059 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1060 self.report_video_info_webpage_download(video_id)
1061 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1063 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1064 % (proto, video_id, el_type))
1065 video_info_webpage = self._download_webpage(
1067 video_id, note=False,
1068 errnote='unable to download video info webpage')
1069 get_video_info = compat_parse_qs(video_info_webpage)
1070 if get_video_info.get('use_cipher_signature') != ['True']:
1071 add_dash_mpd(get_video_info)
1073 video_info = get_video_info
1074 if 'token' in get_video_info:
1076 if 'token' not in video_info:
1077 if 'reason' in video_info:
1078 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1079 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1081 raise ExtractorError('YouTube said: This video is available in %s only' % (
1082 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1084 raise ExtractorError(
1085 'YouTube said: %s' % video_info['reason'][0],
1086 expected=True, video_id=video_id)
1088 raise ExtractorError(
1089 '"token" parameter not in video info for unknown reason',
1093 if 'title' in video_info:
1094 video_title = video_info['title'][0]
1096 self._downloader.report_warning('Unable to extract video title')
1100 video_description = get_element_by_id("eow-description", video_webpage)
1101 if video_description:
1102 video_description = re.sub(r'''(?x)
1104 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1106 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1107 class="yt-uix-redirect-link"\s*>
1110 ''', r'\1', video_description)
1111 video_description = clean_html(video_description)
1113 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1115 video_description = unescapeHTML(fd_mobj.group(1))
1117 video_description = ''
1119 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1120 if not self._downloader.params.get('noplaylist'):
1123 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1124 for feed in multifeed_metadata_list.split(','):
1125 feed_data = compat_parse_qs(feed)
1127 '_type': 'url_transparent',
1128 'ie_key': 'Youtube',
1130 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1131 {'force_singlefeed': True}),
1132 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1134 feed_ids.append(feed_data['id'][0])
1136 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1137 % (', '.join(feed_ids), video_id))
1138 return self.playlist_result(entries, video_id, video_title, video_description)
1139 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1141 if 'view_count' in video_info:
1142 view_count = int(video_info['view_count'][0])
1146 # Check for "rental" videos
1147 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1148 raise ExtractorError('"rental" videos not supported')
1150 # Start extracting information
1151 self.report_information_extraction(video_id)
1154 if 'author' not in video_info:
1155 raise ExtractorError('Unable to extract uploader name')
1156 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1159 video_uploader_id = None
1160 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1161 if mobj is not None:
1162 video_uploader_id = mobj.group(1)
1164 self._downloader.report_warning('unable to extract uploader nickname')
1167 # We try first to get a high quality image:
1168 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1169 video_webpage, re.DOTALL)
1170 if m_thumb is not None:
1171 video_thumbnail = m_thumb.group(1)
1172 elif 'thumbnail_url' not in video_info:
1173 self._downloader.report_warning('unable to extract video thumbnail')
1174 video_thumbnail = None
1175 else: # don't panic if we can't find it
1176 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1179 upload_date = self._html_search_meta(
1180 'datePublished', video_webpage, 'upload date', default=None)
1182 upload_date = self._search_regex(
1183 [r'(?s)id="eow-date.*?>(.*?)</span>',
1184 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1185 video_webpage, 'upload date', default=None)
1187 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1188 upload_date = unified_strdate(upload_date)
1190 m_cat_container = self._search_regex(
1191 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1192 video_webpage, 'categories', default=None)
1194 category = self._html_search_regex(
1195 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1197 video_categories = None if category is None else [category]
1199 video_categories = None
1202 unescapeHTML(m.group('content'))
1203 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1205 def _extract_count(count_name):
1206 return str_to_int(self._search_regex(
1207 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1208 % re.escape(count_name),
1209 video_webpage, count_name, default=None))
1211 like_count = _extract_count('like')
1212 dislike_count = _extract_count('dislike')
1215 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1216 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1218 if 'length_seconds' not in video_info:
1219 self._downloader.report_warning('unable to extract video duration')
1220 video_duration = None
1222 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1225 video_annotations = None
1226 if self._downloader.params.get('writeannotations', False):
1227 video_annotations = self._extract_annotations(video_id)
1229 def _map_to_format_list(urlmap):
1231 for itag, video_real_url in urlmap.items():
1234 'url': video_real_url,
1235 'player_url': player_url,
1237 if itag in self._formats:
1238 dct.update(self._formats[itag])
1242 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1243 self.report_rtmp_download()
1245 'format_id': '_rtmp',
1247 'url': video_info['conn'][0],
1248 'player_url': player_url,
1250 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1251 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1252 if 'rtmpe%3Dyes' in encoded_url_map:
1253 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1255 for url_data_str in encoded_url_map.split(','):
1256 url_data = compat_parse_qs(url_data_str)
1257 if 'itag' not in url_data or 'url' not in url_data:
1259 format_id = url_data['itag'][0]
1260 url = url_data['url'][0]
1262 if 'sig' in url_data:
1263 url += '&signature=' + url_data['sig'][0]
1264 elif 's' in url_data:
1265 encrypted_sig = url_data['s'][0]
1266 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1268 jsplayer_url_json = self._search_regex(
1270 embed_webpage if age_gate else video_webpage,
1271 'JS player URL (1)', default=None)
1272 if not jsplayer_url_json and not age_gate:
1273 # We need the embed website after all
1274 if embed_webpage is None:
1275 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1276 embed_webpage = self._download_webpage(
1277 embed_url, video_id, 'Downloading embed webpage')
1278 jsplayer_url_json = self._search_regex(
1279 ASSETS_RE, embed_webpage, 'JS player URL')
1281 player_url = json.loads(jsplayer_url_json)
1282 if player_url is None:
1283 player_url_json = self._search_regex(
1284 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1285 video_webpage, 'age gate player URL')
1286 player_url = json.loads(player_url_json)
1288 if self._downloader.params.get('verbose'):
1289 if player_url is None:
1290 player_version = 'unknown'
1291 player_desc = 'unknown'
1293 if player_url.endswith('swf'):
1294 player_version = self._search_regex(
1295 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1296 'flash player', fatal=False)
1297 player_desc = 'flash player %s' % player_version
1299 player_version = self._search_regex(
1300 r'html5player-([^/]+?)(?:/html5player)?\.js',
1302 'html5 player', fatal=False)
1303 player_desc = 'html5 player %s' % player_version
1305 parts_sizes = self._signature_cache_id(encrypted_sig)
1306 self.to_screen('{%s} signature length %s, %s' %
1307 (format_id, parts_sizes, player_desc))
1309 signature = self._decrypt_signature(
1310 encrypted_sig, video_id, player_url, age_gate)
1311 url += '&signature=' + signature
1312 if 'ratebypass' not in url:
1313 url += '&ratebypass=yes'
1314 url_map[format_id] = url
1315 formats = _map_to_format_list(url_map)
1316 elif video_info.get('hlsvp'):
1317 manifest_url = video_info['hlsvp'][0]
1318 url_map = self._extract_from_m3u8(manifest_url, video_id)
1319 formats = _map_to_format_list(url_map)
1321 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1323 # Look for the DASH manifest
1324 if self._downloader.params.get('youtube_include_dash_manifest', True):
1325 dash_mpd_fatal = True
1326 for dash_manifest_url in dash_mpds:
1329 for df in self._parse_dash_manifest(
1330 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1331 # Do not overwrite DASH format found in some previous DASH manifest
1332 if df['format_id'] not in dash_formats:
1333 dash_formats[df['format_id']] = df
1334 # Additional DASH manifests may end up in HTTP Error 403 therefore
1335 # allow them to fail without bug report message if we already have
1336 # some DASH manifest succeeded. This is temporary workaround to reduce
1337 # burst of bug reports until we figure out the reason and whether it
1338 # can be fixed at all.
1339 dash_mpd_fatal = False
1340 except (ExtractorError, KeyError) as e:
1341 self.report_warning(
1342 'Skipping DASH manifest: %r' % e, video_id)
1344 # Remove the formats we found through non-DASH, they
1345 # contain less info and it can be wrong, because we use
1346 # fixed values (for example the resolution). See
1347 # https://github.com/rg3/youtube-dl/issues/5774 for an
1349 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1350 formats.extend(dash_formats.values())
1352 # Check for malformed aspect ratio
1353 stretched_m = re.search(
1354 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1357 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1359 if f.get('vcodec') != 'none':
1360 f['stretched_ratio'] = ratio
1362 self._sort_formats(formats)
1366 'uploader': video_uploader,
1367 'uploader_id': video_uploader_id,
1368 'upload_date': upload_date,
1369 'title': video_title,
1370 'thumbnail': video_thumbnail,
1371 'description': video_description,
1372 'categories': video_categories,
1374 'subtitles': video_subtitles,
1375 'automatic_captions': automatic_captions,
1376 'duration': video_duration,
1377 'age_limit': 18 if age_gate else 0,
1378 'annotations': video_annotations,
1379 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1380 'view_count': view_count,
1381 'like_count': like_count,
1382 'dislike_count': dislike_count,
1383 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1386 'start_time': start_time,
1387 'end_time': end_time,
1391 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1392 IE_DESC = 'YouTube.com playlists'
1393 _VALID_URL = r"""(?x)(?:
1398 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1399 \? (?:.*?&)*? (?:p|a|list)=
1403 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1404 # Top tracks, they can also include dots
1409 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1411 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1412 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
1413 IE_NAME = 'youtube:playlist'
1415 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1417 'title': 'ytdl test PL',
1418 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1420 'playlist_count': 3,
1422 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1424 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1425 'title': 'YDL_Empty_List',
1427 'playlist_count': 0,
1429 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1430 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1432 'title': '29C3: Not my department',
1433 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1435 'playlist_count': 95,
1437 'note': 'issue #673',
1438 'url': 'PLBB231211A4F62143',
1440 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1441 'id': 'PLBB231211A4F62143',
1443 'playlist_mincount': 26,
1445 'note': 'Large playlist',
1446 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1448 'title': 'Uploads from Cauchemar',
1449 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1451 'playlist_mincount': 799,
1453 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1455 'title': 'YDL_safe_search',
1456 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1458 'playlist_count': 2,
1461 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1462 'playlist_count': 4,
1465 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1468 'note': 'Embedded SWF player',
1469 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1470 'playlist_count': 4,
1473 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1476 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1477 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1479 'title': 'Uploads from Interstellar Movie',
1480 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1482 'playlist_mincout': 21,
1485 def _real_initialize(self):
1488 def _extract_mix(self, playlist_id):
1489 # The mixes are generated from a single video
1490 # the id of the playlist is just 'RD' + video_id
1491 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1492 webpage = self._download_webpage(
1493 url, playlist_id, 'Downloading Youtube mix')
1494 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1496 search_title('playlist-title') or
1497 search_title('title long-title') or
1498 search_title('title'))
1499 title = clean_html(title_span)
1500 ids = orderedSet(re.findall(
1501 r'''(?xs)data-video-username=".*?".*?
1502 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1504 url_results = self._ids_to_results(ids)
1506 return self.playlist_result(url_results, playlist_id, title)
1508 def _extract_playlist(self, playlist_id):
1509 url = self._TEMPLATE_URL % playlist_id
1510 page = self._download_webpage(url, playlist_id)
1512 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1513 match = match.strip()
1514 # Check if the playlist exists or is private
1515 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1516 raise ExtractorError(
1517 'The playlist doesn\'t exist or is private, use --username or '
1518 '--netrc to access it.',
1520 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1521 raise ExtractorError(
1522 'Invalid parameters. Maybe URL is incorrect.',
1524 elif re.match(r'[^<]*Choose your language[^<]*', match):
1527 self.report_warning('Youtube gives an alert message: ' + match)
1529 # Extract the video ids from the playlist pages
1531 more_widget_html = content_html = page
1532 for page_num in itertools.count(1):
1533 matches = re.finditer(self._VIDEO_RE, content_html)
1534 # We remove the duplicates and the link with index 0
1535 # (it's not the first video of the playlist)
1536 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1537 for vid_id in new_ids:
1538 yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
1540 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1544 more = self._download_json(
1545 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1546 'Downloading page #%s' % page_num,
1547 transform_source=uppercase_escape)
1548 content_html = more['content_html']
1549 if not content_html.strip():
1550 # Some webpages show a "Load more" button but they don't
1553 more_widget_html = more['load_more_widget_html']
1555 playlist_title = self._html_search_regex(
1556 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1559 return self.playlist_result(_entries(), playlist_id, playlist_title)
1561 def _real_extract(self, url):
1562 # Extract playlist id
1563 mobj = re.match(self._VALID_URL, url)
1565 raise ExtractorError('Invalid URL: %s' % url)
1566 playlist_id = mobj.group(1) or mobj.group(2)
1568 # Check if it's a video-specific URL
1569 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1570 if 'v' in query_dict:
1571 video_id = query_dict['v'][0]
1572 if self._downloader.params.get('noplaylist'):
1573 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1574 return self.url_result(video_id, 'Youtube', video_id=video_id)
1576 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1578 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1579 # Mixes require a custom extraction process
1580 return self._extract_mix(playlist_id)
1582 return self._extract_playlist(playlist_id)
1585 class YoutubeChannelIE(InfoExtractor):
1586 IE_DESC = 'YouTube.com channels'
1587 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1588 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1589 IE_NAME = 'youtube:channel'
1591 'note': 'paginated channel',
1592 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1593 'playlist_mincount': 91,
1595 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1600 def extract_videos_from_page(page):
1603 for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
1604 video_id = mobj.group('id')
1605 video_title = unescapeHTML(mobj.group('title'))
1607 idx = ids_in_page.index(video_id)
1608 if video_title and not titles_in_page[idx]:
1609 titles_in_page[idx] = video_title
1611 ids_in_page.append(video_id)
1612 titles_in_page.append(video_title)
1613 return zip(ids_in_page, titles_in_page)
1615 def _real_extract(self, url):
1616 channel_id = self._match_id(url)
1618 url = self._TEMPLATE_URL % channel_id
1620 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1621 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1622 # otherwise fallback on channel by page extraction
1623 channel_page = self._download_webpage(
1624 url + '?view=57', channel_id,
1625 'Downloading channel page', fatal=False)
1626 channel_playlist_id = self._html_search_meta(
1627 'channelId', channel_page, 'channel id', default=None)
1628 if not channel_playlist_id:
1629 channel_playlist_id = self._search_regex(
1630 r'data-channel-external-id="([^"]+)"',
1631 channel_page, 'channel id', default=None)
1632 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1633 playlist_id = 'UU' + channel_playlist_id[2:]
1634 return self.url_result(
1635 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1637 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1638 autogenerated = re.search(r'''(?x)
1640 channel-header-autogenerated-label|
1641 yt-channel-title-autogenerated
1642 )[^"]*"''', channel_page) is not None
1645 # The videos are contained in a single page
1646 # the ajax pages can't be used, they are empty
1649 video_id, 'Youtube', video_id=video_id,
1650 video_title=video_title)
1651 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1652 return self.playlist_result(entries, channel_id)
1655 more_widget_html = content_html = channel_page
1656 for pagenum in itertools.count(1):
1658 for video_id, video_title in self.extract_videos_from_page(content_html):
1659 yield self.url_result(
1660 video_id, 'Youtube', video_id=video_id,
1661 video_title=video_title)
1664 r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1669 more = self._download_json(
1670 'https://youtube.com/%s' % mobj.group('more'), channel_id,
1671 'Downloading page #%s' % (pagenum + 1),
1672 transform_source=uppercase_escape)
1673 content_html = more['content_html']
1674 more_widget_html = more['load_more_widget_html']
1676 return self.playlist_result(_entries(), channel_id)
1679 class YoutubeUserIE(YoutubeChannelIE):
1680 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1681 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1682 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1683 IE_NAME = 'youtube:user'
1686 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1687 'playlist_mincount': 320,
1689 'title': 'TheLinuxFoundation',
1692 'url': 'ytuser:phihag',
1693 'only_matching': True,
1697 def suitable(cls, url):
1698 # Don't return True if the url can be extracted with other youtube
1699 # extractor, the regex would is too permissive and it would match.
1700 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1701 if any(ie.suitable(url) for ie in other_ies):
1704 return super(YoutubeUserIE, cls).suitable(url)
1707 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1708 IE_DESC = 'YouTube.com searches'
1709 # there doesn't appear to be a real limit, for example if you search for
1710 # 'python' you get more than 8.000.000 results
1711 _MAX_RESULTS = float('inf')
1712 IE_NAME = 'youtube:search'
1713 _SEARCH_KEY = 'ytsearch'
1714 _EXTRA_QUERY_ARGS = {}
1717 def _get_n_results(self, query, n):
1718 """Get a specified number of results for a query"""
1723 for pagenum in itertools.count(1):
1725 'search_query': query.encode('utf-8'),
1729 url_query.update(self._EXTRA_QUERY_ARGS)
1730 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1731 data = self._download_json(
1732 result_url, video_id='query "%s"' % query,
1733 note='Downloading page %s' % pagenum,
1734 errnote='Unable to download API page')
1735 html_content = data[1]['body']['content']
1737 if 'class="search-message' in html_content:
1738 raise ExtractorError(
1739 '[youtube] No video results', expected=True)
1741 new_videos = self._ids_to_results(orderedSet(re.findall(
1742 r'href="/watch\?v=(.{11})', html_content)))
1743 videos += new_videos
1744 if not new_videos or len(videos) > limit:
1749 return self.playlist_result(videos, query)
1752 class YoutubeSearchDateIE(YoutubeSearchIE):
1753 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1754 _SEARCH_KEY = 'ytsearchdate'
1755 IE_DESC = 'YouTube.com searches, newest videos first'
1756 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1759 class YoutubeSearchURLIE(InfoExtractor):
1760 IE_DESC = 'YouTube.com search URLs'
1761 IE_NAME = 'youtube:search_url'
1762 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1764 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1765 'playlist_mincount': 5,
1767 'title': 'youtube-dl test video',
1771 def _real_extract(self, url):
1772 mobj = re.match(self._VALID_URL, url)
1773 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1775 webpage = self._download_webpage(url, query)
1776 result_code = self._search_regex(
1777 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1779 part_codes = re.findall(
1780 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1782 for part_code in part_codes:
1783 part_title = self._html_search_regex(
1784 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1785 part_url_snippet = self._html_search_regex(
1786 r'(?s)href="([^"]+)"', part_code, 'item URL')
1787 part_url = compat_urlparse.urljoin(
1788 'https://www.youtube.com/', part_url_snippet)
1792 'title': part_title,
1796 '_type': 'playlist',
1802 class YoutubeShowIE(InfoExtractor):
1803 IE_DESC = 'YouTube.com (multi-season) shows'
1804 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1805 IE_NAME = 'youtube:show'
1807 'url': 'http://www.youtube.com/show/airdisasters',
1808 'playlist_mincount': 3,
1810 'id': 'airdisasters',
1811 'title': 'Air Disasters',
1815 def _real_extract(self, url):
1816 mobj = re.match(self._VALID_URL, url)
1817 playlist_id = mobj.group('id')
1818 webpage = self._download_webpage(
1819 url, playlist_id, 'Downloading show webpage')
1820 # There's one playlist for each season of the show
1821 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1822 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1825 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1826 for season in m_seasons
1828 title = self._og_search_title(webpage, fatal=False)
1831 '_type': 'playlist',
1838 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1840 Base class for feed extractors
1841 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1843 _LOGIN_REQUIRED = True
1847 return 'youtube:%s' % self._FEED_NAME
1849 def _real_initialize(self):
1852 def _real_extract(self, url):
1853 page = self._download_webpage(
1854 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1856 # The extraction process is the same as for playlists, but the regex
1857 # for the video ids doesn't contain an index
1859 more_widget_html = content_html = page
1860 for page_num in itertools.count(1):
1861 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1863 # 'recommended' feed has infinite 'load more' and each new portion spins
1864 # the same videos in (sometimes) slightly different order, so we'll check
1865 # for unicity and break when portion has no new videos
1866 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1872 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1876 more = self._download_json(
1877 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1878 'Downloading page #%s' % page_num,
1879 transform_source=uppercase_escape)
1880 content_html = more['content_html']
1881 more_widget_html = more['load_more_widget_html']
1883 return self.playlist_result(
1884 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1887 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1888 IE_NAME = 'youtube:watchlater'
1889 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1890 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1892 _TESTS = [] # override PlaylistIE tests
1894 def _real_extract(self, url):
1895 return self._extract_playlist('WL')
1898 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1899 IE_NAME = 'youtube:favorites'
1900 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1901 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1902 _LOGIN_REQUIRED = True
1904 def _real_extract(self, url):
1905 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1906 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1907 return self.url_result(playlist_id, 'YoutubePlaylist')
1910 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1911 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1912 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1913 _FEED_NAME = 'recommended'
1914 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1917 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1918 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1919 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1920 _FEED_NAME = 'subscriptions'
1921 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1924 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1925 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1926 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1927 _FEED_NAME = 'history'
1928 _PLAYLIST_TITLE = 'Youtube History'
1931 class YoutubeTruncatedURLIE(InfoExtractor):
1932 IE_NAME = 'youtube:truncated_url'
1933 IE_DESC = False # Do not list
1934 _VALID_URL = r'''(?x)
1936 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1939 annotation_id=annotation_[^&]+|
1944 attribution_link\?a=[^&]+
1950 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1951 'only_matching': True,
1953 'url': 'http://www.youtube.com/watch?',
1954 'only_matching': True,
1956 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1957 'only_matching': True,
1959 'url': 'https://www.youtube.com/watch?feature=foo',
1960 'only_matching': True,
1962 'url': 'https://www.youtube.com/watch?hl=en-GB',
1963 'only_matching': True,
1966 def _real_extract(self, url):
1967 raise ExtractorError(
1968 'Did you forget to quote the URL? Remember that & is a meta '
1969 'character in most shells, so you want to put the URL in quotes, '
1971 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1972 ' or simply youtube-dl BaW_jenozKc .',
1976 class YoutubeTruncatedIDIE(InfoExtractor):
1977 IE_NAME = 'youtube:truncated_id'
1978 IE_DESC = False # Do not list
1979 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1982 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1983 'only_matching': True,
1986 def _real_extract(self, url):
1987 video_id = self._match_id(url)
1988 raise ExtractorError(
1989 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),