3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
31 get_element_by_attribute,
46 class YoutubeBaseInfoExtractor(InfoExtractor):
47 """Provide base functions for Youtube extractors"""
48 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
49 _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
50 _NETRC_MACHINE = 'youtube'
51 # If True it will raise an error if no login info is provided
52 _LOGIN_REQUIRED = False
54 def _set_language(self):
56 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
57 # YouTube sets the expire time to about two months
58 expire_time=time.time() + 2 * 30 * 24 * 3600)
60 def _ids_to_results(self, ids):
62 self.url_result(vid_id, 'Youtube', video_id=vid_id)
67 Attempt to log in to YouTube.
68 True is returned if successful or skipped.
69 False is returned if login failed.
71 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
73 (username, password) = self._get_login_info()
74 # No authentication to be performed
76 if self._LOGIN_REQUIRED:
77 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
80 login_page = self._download_webpage(
81 self._LOGIN_URL, None,
82 note='Downloading login page',
83 errnote='unable to fetch login page', fatal=False)
84 if login_page is False:
87 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
88 login_page, 'Login GALX parameter')
92 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
97 'PersistentCookie': 'yes',
99 'bgresponse': 'js_disabled',
100 'checkConnection': '',
101 'checkedDomains': 'youtube',
108 'service': 'youtube',
113 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
115 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
116 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
118 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
119 login_results = self._download_webpage(
121 note='Logging in', errnote='unable to log in', fatal=False)
122 if login_results is False:
125 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
126 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
129 # TODO add SMS and phone call support - these require making a request and then prompting the user
131 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
132 tfa_code = self._get_tfa_info()
135 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
136 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 # Unlike the first login form, secTok and timeStmp are both required for the TFA form
141 match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
143 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
144 secTok = match.group(1)
145 match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
147 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
148 timeStmp = match.group(1)
151 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
153 'smsUserPin': tfa_code,
154 'smsVerifyPin': 'Verify',
156 'PersistentCookie': 'yes',
157 'checkConnection': '',
158 'checkedDomains': 'youtube',
161 'timeStmp': timeStmp,
162 'service': 'youtube',
165 tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
166 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
168 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
169 tfa_results = self._download_webpage(
171 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
173 if tfa_results is False:
176 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
177 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
179 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
180 self._downloader.report_warning('unable to log in - did the page structure change?')
182 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
183 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
186 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
187 self._downloader.report_warning('unable to log in: bad username or password')
191 def _real_initialize(self):
192 if self._downloader is None:
195 if not self._login():
199 class YoutubeIE(YoutubeBaseInfoExtractor):
200 IE_DESC = 'YouTube.com'
201 _VALID_URL = r"""(?x)^
203 (?:https?://|//) # http(s):// or protocol-independent URL
204 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
205 (?:www\.)?deturl\.com/www\.youtube\.com/|
206 (?:www\.)?pwnyoutube\.com/|
207 (?:www\.)?yourepeat\.com/|
208 tube\.majestyc\.net/|
209 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
210 (?:.*?\#/)? # handle anchor (#/) redirect urls
211 (?: # the various things that can precede the ID:
212 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
213 |(?: # or the v= param in all its forms
214 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
215 (?:\?|\#!?) # the params delimiter ? or # or #!
216 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
220 |youtu\.be/ # just youtu.be/xxxx
221 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
223 )? # all until now is optional -> you can pass the naked ID
224 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
225 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
226 (?(1).+)? # if we found the ID, everything can follow
228 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
230 '5': {'ext': 'flv', 'width': 400, 'height': 240},
231 '6': {'ext': 'flv', 'width': 450, 'height': 270},
232 '13': {'ext': '3gp'},
233 '17': {'ext': '3gp', 'width': 176, 'height': 144},
234 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
235 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
236 '34': {'ext': 'flv', 'width': 640, 'height': 360},
237 '35': {'ext': 'flv', 'width': 854, 'height': 480},
238 '36': {'ext': '3gp', 'width': 320, 'height': 240},
239 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
240 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
241 '43': {'ext': 'webm', 'width': 640, 'height': 360},
242 '44': {'ext': 'webm', 'width': 854, 'height': 480},
243 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
244 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
245 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
246 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
250 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
251 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
252 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
253 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
254 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
255 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
256 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
258 # Apple HTTP Live Streaming
259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
275 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
276 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
277 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
278 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
281 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
282 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
283 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
286 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
287 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
288 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
289 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
290 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
291 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
292 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
293 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
294 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
295 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
296 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
297 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
298 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
299 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
303 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
304 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
305 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
306 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
309 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
310 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
312 # Dash webm audio with opus inside
313 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
314 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
315 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
318 '_rtmp': {'protocol': 'rtmp'},
324 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
328 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
329 'uploader': 'Philipp Hagemeister',
330 'uploader_id': 'phihag',
331 'upload_date': '20121002',
332 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
333 'categories': ['Science & Technology'],
334 'tags': ['youtube-dl'],
336 'dislike_count': int,
342 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
343 'note': 'Test generic use_cipher_signature video (#897)',
347 'upload_date': '20120506',
348 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
349 'description': 'md5:782e8651347686cba06e58f71ab51773',
350 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
351 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
352 'iconic ep', 'iconic', 'love', 'it'],
353 'uploader': 'Icona Pop',
354 'uploader_id': 'IconaPop',
358 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
359 'note': 'Test VEVO video with age protection (#956)',
363 'upload_date': '20130703',
364 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
365 'description': 'md5:64249768eec3bc4276236606ea996373',
366 'uploader': 'justintimberlakeVEVO',
367 'uploader_id': 'justintimberlakeVEVO',
371 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
372 'note': 'Embed-only video (#1746)',
376 'upload_date': '20120608',
377 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
378 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
379 'uploader': 'SET India',
380 'uploader_id': 'setindia'
384 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
385 'note': 'Use the first video ID in the URL',
389 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
390 'uploader': 'Philipp Hagemeister',
391 'uploader_id': 'phihag',
392 'upload_date': '20121002',
393 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
394 'categories': ['Science & Technology'],
395 'tags': ['youtube-dl'],
397 'dislike_count': int,
401 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
402 'note': '256k DASH audio (format 141) via DASH manifest',
406 'upload_date': '20121002',
407 'uploader_id': '8KVIDEO',
409 'uploader': '8KVIDEO',
410 'title': 'UHDTV TEST 8K VIDEO.mp4'
413 'youtube_include_dash_manifest': True,
417 # DASH manifest with encrypted signature
419 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
423 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
424 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
425 'uploader': 'AfrojackVEVO',
426 'uploader_id': 'AfrojackVEVO',
427 'upload_date': '20131011',
430 'youtube_include_dash_manifest': True,
434 # JS player signature function name containing $
436 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
440 'title': 'Taylor Swift - Shake It Off',
441 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
442 'uploader': 'TaylorSwiftVEVO',
443 'uploader_id': 'TaylorSwiftVEVO',
444 'upload_date': '20140818',
447 'youtube_include_dash_manifest': True,
453 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
457 'upload_date': '20100909',
458 'uploader': 'The Amazing Atheist',
459 'uploader_id': 'TheAmazingAtheist',
460 'title': 'Burning Everyone\'s Koran',
461 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
464 # Normal age-gate video (No vevo, embed allowed)
466 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
470 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
471 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
472 'uploader': 'The Witcher',
473 'uploader_id': 'WitcherGame',
474 'upload_date': '20140605',
477 # Age-gate video with encrypted signature
479 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
483 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
484 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
485 'uploader': 'LloydVEVO',
486 'uploader_id': 'LloydVEVO',
487 'upload_date': '20110629',
490 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
492 'url': '__2ABJjxzNo',
496 'upload_date': '20100430',
497 'uploader_id': 'deadmau5',
498 'description': 'md5:12c56784b8032162bb936a5f76d55360',
499 'uploader': 'deadmau5',
500 'title': 'Deadmau5 - Some Chords (HD)',
502 'expected_warnings': [
503 'DASH manifest missing',
506 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
508 'url': 'lqQg6PlCWgI',
512 'upload_date': '20120731',
513 'uploader_id': 'olympic',
514 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
515 'uploader': 'Olympics',
516 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
519 'skip_download': 'requires avconv',
524 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
528 'stretched_ratio': 16 / 9.,
529 'upload_date': '20110310',
530 'uploader_id': 'AllenMeow',
531 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
533 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
536 # url_encoded_fmt_stream_map is empty string
538 'url': 'qEJwOuvDf7I',
542 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
544 'upload_date': '20150404',
545 'uploader_id': 'spbelect',
546 'uploader': 'Наблюдатели Петербурга',
549 'skip_download': 'requires avconv',
552 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
554 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
558 'title': 'md5:7b81415841e02ecd4313668cde88737a',
559 'description': 'md5:116377fd2963b81ec4ce64b542173306',
560 'upload_date': '20150625',
561 'uploader_id': 'dorappi2000',
562 'uploader': 'dorappi2000',
563 'formats': 'mincount:33',
566 # DASH manifest with segment_list
568 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
569 'md5': '8ce563a1d667b599d21064e982ab9e31',
573 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
574 'uploader': 'Airtek',
575 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
576 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
577 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
580 'youtube_include_dash_manifest': True,
581 'format': '135', # bestvideo
585 # Multifeed videos (multiple cameras), URL is for Main Camera
586 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
589 'title': 'teamPGP: Rocket League Noob Stream',
590 'description': 'md5:dc7872fb300e143831327f1bae3af010',
596 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
597 'description': 'md5:dc7872fb300e143831327f1bae3af010',
598 'upload_date': '20150721',
599 'uploader': 'Beer Games Beer',
600 'uploader_id': 'beergamesbeer',
606 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
607 'description': 'md5:dc7872fb300e143831327f1bae3af010',
608 'upload_date': '20150721',
609 'uploader': 'Beer Games Beer',
610 'uploader_id': 'beergamesbeer',
616 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
617 'description': 'md5:dc7872fb300e143831327f1bae3af010',
618 'upload_date': '20150721',
619 'uploader': 'Beer Games Beer',
620 'uploader_id': 'beergamesbeer',
626 'title': 'teamPGP: Rocket League Noob Stream (zim)',
627 'description': 'md5:dc7872fb300e143831327f1bae3af010',
628 'upload_date': '20150721',
629 'uploader': 'Beer Games Beer',
630 'uploader_id': 'beergamesbeer',
634 'skip_download': True,
639 def __init__(self, *args, **kwargs):
640 super(YoutubeIE, self).__init__(*args, **kwargs)
641 self._player_cache = {}
643 def report_video_info_webpage_download(self, video_id):
644 """Report attempt to download video info webpage."""
645 self.to_screen('%s: Downloading video info webpage' % video_id)
647 def report_information_extraction(self, video_id):
648 """Report attempt to extract video information."""
649 self.to_screen('%s: Extracting video information' % video_id)
651 def report_unavailable_format(self, video_id, format):
652 """Report extracted video URL."""
653 self.to_screen('%s: Format %s not available' % (video_id, format))
655 def report_rtmp_download(self):
656 """Indicate the download will use the RTMP protocol."""
657 self.to_screen('RTMP download detected')
659 def _signature_cache_id(self, example_sig):
660 """ Return a string representation of a signature """
661 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
663 def _extract_signature_function(self, video_id, player_url, example_sig):
665 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
668 raise ExtractorError('Cannot identify player %r' % player_url)
669 player_type = id_m.group('ext')
670 player_id = id_m.group('id')
672 # Read from filesystem cache
673 func_id = '%s_%s_%s' % (
674 player_type, player_id, self._signature_cache_id(example_sig))
675 assert os.path.basename(func_id) == func_id
677 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
678 if cache_spec is not None:
679 return lambda s: ''.join(s[i] for i in cache_spec)
682 'Downloading player %s' % player_url
683 if self._downloader.params.get('verbose') else
684 'Downloading %s player %s' % (player_type, player_id)
686 if player_type == 'js':
687 code = self._download_webpage(
688 player_url, video_id,
690 errnote='Download of %s failed' % player_url)
691 res = self._parse_sig_js(code)
692 elif player_type == 'swf':
693 urlh = self._request_webpage(
694 player_url, video_id,
696 errnote='Download of %s failed' % player_url)
698 res = self._parse_sig_swf(code)
700 assert False, 'Invalid player type %r' % player_type
702 test_string = ''.join(map(compat_chr, range(len(example_sig))))
703 cache_res = res(test_string)
704 cache_spec = [ord(c) for c in cache_res]
706 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
709 def _print_sig_code(self, func, example_sig):
710 def gen_sig_code(idxs):
711 def _genslice(start, end, step):
712 starts = '' if start == 0 else str(start)
713 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
714 steps = '' if step == 1 else (':%d' % step)
715 return 's[%s%s%s]' % (starts, ends, steps)
718 # Quelch pyflakes warnings - start will be set when step is set
719 start = '(Never used)'
720 for i, prev in zip(idxs[1:], idxs[:-1]):
724 yield _genslice(start, prev, step)
727 if i - prev in [-1, 1]:
736 yield _genslice(start, i, step)
738 test_string = ''.join(map(compat_chr, range(len(example_sig))))
739 cache_res = func(test_string)
740 cache_spec = [ord(c) for c in cache_res]
741 expr_code = ' + '.join(gen_sig_code(cache_spec))
742 signature_id_tuple = '(%s)' % (
743 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
744 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
745 ' return %s\n') % (signature_id_tuple, expr_code)
746 self.to_screen('Extracted signature function:\n' + code)
748 def _parse_sig_js(self, jscode):
749 funcname = self._search_regex(
750 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
751 'Initial JS player signature function name')
753 jsi = JSInterpreter(jscode)
754 initial_function = jsi.extract_function(funcname)
755 return lambda s: initial_function([s])
757 def _parse_sig_swf(self, file_contents):
758 swfi = SWFInterpreter(file_contents)
759 TARGET_CLASSNAME = 'SignatureDecipher'
760 searched_class = swfi.extract_class(TARGET_CLASSNAME)
761 initial_function = swfi.extract_function(searched_class, 'decipher')
762 return lambda s: initial_function([s])
764 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
765 """Turn the encrypted s field into a working signature"""
767 if player_url is None:
768 raise ExtractorError('Cannot decrypt signature without player_url')
770 if player_url.startswith('//'):
771 player_url = 'https:' + player_url
773 player_id = (player_url, self._signature_cache_id(s))
774 if player_id not in self._player_cache:
775 func = self._extract_signature_function(
776 video_id, player_url, s
778 self._player_cache[player_id] = func
779 func = self._player_cache[player_id]
780 if self._downloader.params.get('youtube_print_sig_code'):
781 self._print_sig_code(func, s)
783 except Exception as e:
784 tb = traceback.format_exc()
785 raise ExtractorError(
786 'Signature extraction failed: ' + tb, cause=e)
788 def _get_subtitles(self, video_id, webpage):
790 subs_doc = self._download_xml(
791 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
792 video_id, note=False)
793 except ExtractorError as err:
794 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
798 for track in subs_doc.findall('track'):
799 lang = track.attrib['lang_code']
800 if lang in sub_lang_list:
803 for ext in ['sbv', 'vtt', 'srt']:
804 params = compat_urllib_parse.urlencode({
808 'name': track.attrib['name'].encode('utf-8'),
811 'url': 'https://www.youtube.com/api/timedtext?' + params,
814 sub_lang_list[lang] = sub_formats
815 if not sub_lang_list:
816 self._downloader.report_warning('video doesn\'t have subtitles')
820 def _get_automatic_captions(self, video_id, webpage):
821 """We need the webpage for getting the captions url, pass it as an
822 argument to speed up the process."""
823 self.to_screen('%s: Looking for automatic captions' % video_id)
824 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
825 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
827 self._downloader.report_warning(err_msg)
829 player_config = json.loads(mobj.group(1))
831 args = player_config['args']
832 caption_url = args['ttsurl']
833 timestamp = args['timestamp']
834 # We get the available subtitles
835 list_params = compat_urllib_parse.urlencode({
840 list_url = caption_url + '&' + list_params
841 caption_list = self._download_xml(list_url, video_id)
842 original_lang_node = caption_list.find('track')
843 if original_lang_node is None:
844 self._downloader.report_warning('Video doesn\'t have automatic captions')
846 original_lang = original_lang_node.attrib['lang_code']
847 caption_kind = original_lang_node.attrib.get('kind', '')
850 for lang_node in caption_list.findall('target'):
851 sub_lang = lang_node.attrib['lang_code']
853 for ext in ['sbv', 'vtt', 'srt']:
854 params = compat_urllib_parse.urlencode({
855 'lang': original_lang,
859 'kind': caption_kind,
862 'url': caption_url + '&' + params,
865 sub_lang_list[sub_lang] = sub_formats
867 # An extractor error can be raise by the download process if there are
868 # no automatic captions but there are subtitles
869 except (KeyError, ExtractorError):
870 self._downloader.report_warning(err_msg)
874 def extract_id(cls, url):
875 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
877 raise ExtractorError('Invalid URL: %s' % url)
878 video_id = mobj.group(2)
881 def _extract_from_m3u8(self, manifest_url, video_id):
884 def _get_urls(_manifest):
885 lines = _manifest.split('\n')
886 urls = filter(lambda l: l and not l.startswith('#'),
889 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
890 formats_urls = _get_urls(manifest)
891 for format_url in formats_urls:
892 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
893 url_map[itag] = format_url
896 def _extract_annotations(self, video_id):
897 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
898 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
900 def _parse_dash_manifest(
901 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
902 def decrypt_sig(mobj):
904 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
905 return '/signature/%s' % dec_s
906 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
907 dash_doc = self._download_xml(
908 dash_manifest_url, video_id,
909 note='Downloading DASH manifest',
910 errnote='Could not download DASH manifest',
913 if dash_doc is False:
917 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
918 mime_type = a.attrib.get('mimeType')
919 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
920 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
923 if mime_type == 'text/vtt':
924 # TODO implement WebVTT downloading
926 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
927 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
928 format_id = r.attrib['id']
929 video_url = url_el.text
930 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
932 'format_id': format_id,
934 'width': int_or_none(r.attrib.get('width')),
935 'height': int_or_none(r.attrib.get('height')),
936 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
937 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
938 'filesize': filesize,
939 'fps': int_or_none(r.attrib.get('frameRate')),
941 if segment_list is not None:
943 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
944 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
945 'protocol': 'http_dash_segments',
948 existing_format = next(
950 if fo['format_id'] == format_id)
951 except StopIteration:
952 full_info = self._formats.get(format_id, {}).copy()
954 codecs = r.attrib.get('codecs')
956 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
957 full_info['vcodec'] = codecs
958 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
959 full_info['acodec'] = codecs
960 formats.append(full_info)
962 existing_format.update(f)
964 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
967 def _real_extract(self, url):
968 url, smuggled_data = unsmuggle_url(url, {})
971 'http' if self._downloader.params.get('prefer_insecure', False)
976 parsed_url = compat_urllib_parse_urlparse(url)
977 for component in [parsed_url.fragment, parsed_url.query]:
978 query = compat_parse_qs(component)
979 if start_time is None and 't' in query:
980 start_time = parse_duration(query['t'][0])
981 if start_time is None and 'start' in query:
982 start_time = parse_duration(query['start'][0])
983 if end_time is None and 'end' in query:
984 end_time = parse_duration(query['end'][0])
986 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
987 mobj = re.search(self._NEXT_URL_RE, url)
989 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
990 video_id = self.extract_id(url)
993 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
994 video_webpage = self._download_webpage(url, video_id)
996 # Attempt to extract SWF player URL
997 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
999 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1005 def add_dash_mpd(video_info):
1006 dash_mpd = video_info.get('dashmpd')
1007 if dash_mpd and dash_mpd[0] not in dash_mpds:
1008 dash_mpds.append(dash_mpd[0])
1011 embed_webpage = None
1013 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1015 # We simulate the access to the video from www.youtube.com/v/{video_id}
1016 # this can be viewed without login into Youtube
1017 url = proto + '://www.youtube.com/embed/%s' % video_id
1018 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1019 data = compat_urllib_parse.urlencode({
1020 'video_id': video_id,
1021 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1022 'sts': self._search_regex(
1023 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1025 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1026 video_info_webpage = self._download_webpage(
1027 video_info_url, video_id,
1028 note='Refetching age-gated info webpage',
1029 errnote='unable to download video info webpage')
1030 video_info = compat_parse_qs(video_info_webpage)
1031 add_dash_mpd(video_info)
1035 # Try looking directly into the video webpage
1036 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1038 json_code = uppercase_escape(mobj.group(1))
1039 ytplayer_config = json.loads(json_code)
1040 args = ytplayer_config['args']
1041 if args.get('url_encoded_fmt_stream_map'):
1042 # Convert to the same format returned by compat_parse_qs
1043 video_info = dict((k, [v]) for k, v in args.items())
1044 add_dash_mpd(video_info)
1045 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1047 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1048 # We also try looking in get_video_info since it may contain different dashmpd
1049 # URL that points to a DASH manifest with possibly different itag set (some itags
1050 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1051 # manifest pointed by get_video_info's dashmpd).
1052 # The general idea is to take a union of itags of both DASH manifests (for example
1053 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1054 self.report_video_info_webpage_download(video_id)
1055 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1057 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1058 % (proto, video_id, el_type))
1059 video_info_webpage = self._download_webpage(
1061 video_id, note=False,
1062 errnote='unable to download video info webpage')
1063 get_video_info = compat_parse_qs(video_info_webpage)
1064 if get_video_info.get('use_cipher_signature') != ['True']:
1065 add_dash_mpd(get_video_info)
1067 video_info = get_video_info
1068 if 'token' in get_video_info:
1070 if 'token' not in video_info:
1071 if 'reason' in video_info:
1072 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1073 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1075 raise ExtractorError('YouTube said: This video is available in %s only' % (
1076 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1078 raise ExtractorError(
1079 'YouTube said: %s' % video_info['reason'][0],
1080 expected=True, video_id=video_id)
1082 raise ExtractorError(
1083 '"token" parameter not in video info for unknown reason',
1087 if 'title' in video_info:
1088 video_title = video_info['title'][0]
1090 self._downloader.report_warning('Unable to extract video title')
1094 video_description = get_element_by_id("eow-description", video_webpage)
1095 if video_description:
1096 video_description = re.sub(r'''(?x)
1098 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1100 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1101 class="yt-uix-redirect-link"\s*>
1104 ''', r'\1', video_description)
1105 video_description = clean_html(video_description)
1107 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1109 video_description = unescapeHTML(fd_mobj.group(1))
1111 video_description = ''
1113 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1114 if not self._downloader.params.get('noplaylist'):
1117 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1118 for feed in multifeed_metadata_list.split(','):
1119 feed_data = compat_parse_qs(feed)
1121 '_type': 'url_transparent',
1122 'ie_key': 'Youtube',
1124 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1125 {'force_singlefeed': True}),
1126 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1128 feed_ids.append(feed_data['id'][0])
1130 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1131 % (', '.join(feed_ids), video_id))
1132 return self.playlist_result(entries, video_id, video_title, video_description)
1133 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1135 if 'view_count' in video_info:
1136 view_count = int(video_info['view_count'][0])
1140 # Check for "rental" videos
1141 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1142 raise ExtractorError('"rental" videos not supported')
1144 # Start extracting information
1145 self.report_information_extraction(video_id)
1148 if 'author' not in video_info:
1149 raise ExtractorError('Unable to extract uploader name')
1150 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1153 video_uploader_id = None
1154 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1155 if mobj is not None:
1156 video_uploader_id = mobj.group(1)
1158 self._downloader.report_warning('unable to extract uploader nickname')
1161 # We try first to get a high quality image:
1162 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1163 video_webpage, re.DOTALL)
1164 if m_thumb is not None:
1165 video_thumbnail = m_thumb.group(1)
1166 elif 'thumbnail_url' not in video_info:
1167 self._downloader.report_warning('unable to extract video thumbnail')
1168 video_thumbnail = None
1169 else: # don't panic if we can't find it
1170 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1173 upload_date = self._html_search_meta(
1174 'datePublished', video_webpage, 'upload date', default=None)
1176 upload_date = self._search_regex(
1177 [r'(?s)id="eow-date.*?>(.*?)</span>',
1178 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1179 video_webpage, 'upload date', default=None)
1181 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1182 upload_date = unified_strdate(upload_date)
1184 m_cat_container = self._search_regex(
1185 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1186 video_webpage, 'categories', default=None)
1188 category = self._html_search_regex(
1189 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1191 video_categories = None if category is None else [category]
1193 video_categories = None
1196 unescapeHTML(m.group('content'))
1197 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1199 def _extract_count(count_name):
1200 return str_to_int(self._search_regex(
1201 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1202 % re.escape(count_name),
1203 video_webpage, count_name, default=None))
1205 like_count = _extract_count('like')
1206 dislike_count = _extract_count('dislike')
1209 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1210 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1212 if 'length_seconds' not in video_info:
1213 self._downloader.report_warning('unable to extract video duration')
1214 video_duration = None
1216 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1219 video_annotations = None
1220 if self._downloader.params.get('writeannotations', False):
1221 video_annotations = self._extract_annotations(video_id)
1223 def _map_to_format_list(urlmap):
1225 for itag, video_real_url in urlmap.items():
1228 'url': video_real_url,
1229 'player_url': player_url,
1231 if itag in self._formats:
1232 dct.update(self._formats[itag])
1236 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1237 self.report_rtmp_download()
1239 'format_id': '_rtmp',
1241 'url': video_info['conn'][0],
1242 'player_url': player_url,
1244 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1245 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1246 if 'rtmpe%3Dyes' in encoded_url_map:
1247 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1249 for url_data_str in encoded_url_map.split(','):
1250 url_data = compat_parse_qs(url_data_str)
1251 if 'itag' not in url_data or 'url' not in url_data:
1253 format_id = url_data['itag'][0]
1254 url = url_data['url'][0]
1256 if 'sig' in url_data:
1257 url += '&signature=' + url_data['sig'][0]
1258 elif 's' in url_data:
1259 encrypted_sig = url_data['s'][0]
1260 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1262 jsplayer_url_json = self._search_regex(
1264 embed_webpage if age_gate else video_webpage,
1265 'JS player URL (1)', default=None)
1266 if not jsplayer_url_json and not age_gate:
1267 # We need the embed website after all
1268 if embed_webpage is None:
1269 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1270 embed_webpage = self._download_webpage(
1271 embed_url, video_id, 'Downloading embed webpage')
1272 jsplayer_url_json = self._search_regex(
1273 ASSETS_RE, embed_webpage, 'JS player URL')
1275 player_url = json.loads(jsplayer_url_json)
1276 if player_url is None:
1277 player_url_json = self._search_regex(
1278 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1279 video_webpage, 'age gate player URL')
1280 player_url = json.loads(player_url_json)
1282 if self._downloader.params.get('verbose'):
1283 if player_url is None:
1284 player_version = 'unknown'
1285 player_desc = 'unknown'
1287 if player_url.endswith('swf'):
1288 player_version = self._search_regex(
1289 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1290 'flash player', fatal=False)
1291 player_desc = 'flash player %s' % player_version
1293 player_version = self._search_regex(
1294 r'html5player-([^/]+?)(?:/html5player)?\.js',
1296 'html5 player', fatal=False)
1297 player_desc = 'html5 player %s' % player_version
1299 parts_sizes = self._signature_cache_id(encrypted_sig)
1300 self.to_screen('{%s} signature length %s, %s' %
1301 (format_id, parts_sizes, player_desc))
1303 signature = self._decrypt_signature(
1304 encrypted_sig, video_id, player_url, age_gate)
1305 url += '&signature=' + signature
1306 if 'ratebypass' not in url:
1307 url += '&ratebypass=yes'
1308 url_map[format_id] = url
1309 formats = _map_to_format_list(url_map)
1310 elif video_info.get('hlsvp'):
1311 manifest_url = video_info['hlsvp'][0]
1312 url_map = self._extract_from_m3u8(manifest_url, video_id)
1313 formats = _map_to_format_list(url_map)
1315 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1317 # Look for the DASH manifest
1318 if self._downloader.params.get('youtube_include_dash_manifest', True):
1319 dash_mpd_fatal = True
1320 for dash_manifest_url in dash_mpds:
1323 for df in self._parse_dash_manifest(
1324 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1325 # Do not overwrite DASH format found in some previous DASH manifest
1326 if df['format_id'] not in dash_formats:
1327 dash_formats[df['format_id']] = df
1328 # Additional DASH manifests may end up in HTTP Error 403 therefore
1329 # allow them to fail without bug report message if we already have
1330 # some DASH manifest succeeded. This is temporary workaround to reduce
1331 # burst of bug reports until we figure out the reason and whether it
1332 # can be fixed at all.
1333 dash_mpd_fatal = False
1334 except (ExtractorError, KeyError) as e:
1335 self.report_warning(
1336 'Skipping DASH manifest: %r' % e, video_id)
1338 # Remove the formats we found through non-DASH, they
1339 # contain less info and it can be wrong, because we use
1340 # fixed values (for example the resolution). See
1341 # https://github.com/rg3/youtube-dl/issues/5774 for an
1343 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1344 formats.extend(dash_formats.values())
1346 # Check for malformed aspect ratio
1347 stretched_m = re.search(
1348 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1351 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1353 if f.get('vcodec') != 'none':
1354 f['stretched_ratio'] = ratio
1356 self._sort_formats(formats)
1360 'uploader': video_uploader,
1361 'uploader_id': video_uploader_id,
1362 'upload_date': upload_date,
1363 'title': video_title,
1364 'thumbnail': video_thumbnail,
1365 'description': video_description,
1366 'categories': video_categories,
1368 'subtitles': video_subtitles,
1369 'automatic_captions': automatic_captions,
1370 'duration': video_duration,
1371 'age_limit': 18 if age_gate else 0,
1372 'annotations': video_annotations,
1373 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1374 'view_count': view_count,
1375 'like_count': like_count,
1376 'dislike_count': dislike_count,
1377 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1380 'start_time': start_time,
1381 'end_time': end_time,
1385 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1386 IE_DESC = 'YouTube.com playlists'
1387 _VALID_URL = r"""(?x)(?:
1392 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1393 \? (?:.*?&)*? (?:p|a|list)=
1397 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1398 # Top tracks, they can also include dots
1403 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1405 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1406 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
1407 IE_NAME = 'youtube:playlist'
1409 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1411 'title': 'ytdl test PL',
1412 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1414 'playlist_count': 3,
1416 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1418 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1419 'title': 'YDL_Empty_List',
1421 'playlist_count': 0,
1423 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1424 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1426 'title': '29C3: Not my department',
1427 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1429 'playlist_count': 95,
1431 'note': 'issue #673',
1432 'url': 'PLBB231211A4F62143',
1434 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1435 'id': 'PLBB231211A4F62143',
1437 'playlist_mincount': 26,
1439 'note': 'Large playlist',
1440 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1442 'title': 'Uploads from Cauchemar',
1443 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1445 'playlist_mincount': 799,
1447 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1449 'title': 'YDL_safe_search',
1450 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1452 'playlist_count': 2,
1455 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1456 'playlist_count': 4,
1459 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1462 'note': 'Embedded SWF player',
1463 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1464 'playlist_count': 4,
1467 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1470 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1471 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1473 'title': 'Uploads from Interstellar Movie',
1474 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1476 'playlist_mincout': 21,
1479 def _real_initialize(self):
1482 def _extract_mix(self, playlist_id):
1483 # The mixes are generated from a single video
1484 # the id of the playlist is just 'RD' + video_id
1485 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1486 webpage = self._download_webpage(
1487 url, playlist_id, 'Downloading Youtube mix')
1488 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1490 search_title('playlist-title') or
1491 search_title('title long-title') or
1492 search_title('title'))
1493 title = clean_html(title_span)
1494 ids = orderedSet(re.findall(
1495 r'''(?xs)data-video-username=".*?".*?
1496 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1498 url_results = self._ids_to_results(ids)
1500 return self.playlist_result(url_results, playlist_id, title)
1502 def _extract_playlist(self, playlist_id):
1503 url = self._TEMPLATE_URL % playlist_id
1504 page = self._download_webpage(url, playlist_id)
1506 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1507 match = match.strip()
1508 # Check if the playlist exists or is private
1509 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1510 raise ExtractorError(
1511 'The playlist doesn\'t exist or is private, use --username or '
1512 '--netrc to access it.',
1514 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1515 raise ExtractorError(
1516 'Invalid parameters. Maybe URL is incorrect.',
1518 elif re.match(r'[^<]*Choose your language[^<]*', match):
1521 self.report_warning('Youtube gives an alert message: ' + match)
1523 # Extract the video ids from the playlist pages
1525 more_widget_html = content_html = page
1526 for page_num in itertools.count(1):
1527 matches = re.finditer(self._VIDEO_RE, content_html)
1528 # We remove the duplicates and the link with index 0
1529 # (it's not the first video of the playlist)
1530 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1531 for vid_id in new_ids:
1532 yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
1534 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1538 more = self._download_json(
1539 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1540 'Downloading page #%s' % page_num,
1541 transform_source=uppercase_escape)
1542 content_html = more['content_html']
1543 if not content_html.strip():
1544 # Some webpages show a "Load more" button but they don't
1547 more_widget_html = more['load_more_widget_html']
1549 playlist_title = self._html_search_regex(
1550 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1553 return self.playlist_result(_entries(), playlist_id, playlist_title)
1555 def _real_extract(self, url):
1556 # Extract playlist id
1557 mobj = re.match(self._VALID_URL, url)
1559 raise ExtractorError('Invalid URL: %s' % url)
1560 playlist_id = mobj.group(1) or mobj.group(2)
1562 # Check if it's a video-specific URL
1563 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1564 if 'v' in query_dict:
1565 video_id = query_dict['v'][0]
1566 if self._downloader.params.get('noplaylist'):
1567 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1568 return self.url_result(video_id, 'Youtube', video_id=video_id)
1570 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1572 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1573 # Mixes require a custom extraction process
1574 return self._extract_mix(playlist_id)
1576 return self._extract_playlist(playlist_id)
1579 class YoutubeChannelIE(InfoExtractor):
1580 IE_DESC = 'YouTube.com channels'
1581 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1582 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1583 IE_NAME = 'youtube:channel'
1585 'note': 'paginated channel',
1586 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1587 'playlist_mincount': 91,
1589 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1594 def extract_videos_from_page(page):
1597 for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
1598 video_id = mobj.group('id')
1599 video_title = unescapeHTML(mobj.group('title'))
1601 idx = ids_in_page.index(video_id)
1602 if video_title and not titles_in_page[idx]:
1603 titles_in_page[idx] = video_title
1605 ids_in_page.append(video_id)
1606 titles_in_page.append(video_title)
1607 return zip(ids_in_page, titles_in_page)
1609 def _real_extract(self, url):
1610 channel_id = self._match_id(url)
1612 url = self._TEMPLATE_URL % channel_id
1614 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1615 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1616 # otherwise fallback on channel by page extraction
1617 channel_page = self._download_webpage(
1618 url + '?view=57', channel_id,
1619 'Downloading channel page', fatal=False)
1620 channel_playlist_id = self._html_search_meta(
1621 'channelId', channel_page, 'channel id', default=None)
1622 if not channel_playlist_id:
1623 channel_playlist_id = self._search_regex(
1624 r'data-channel-external-id="([^"]+)"',
1625 channel_page, 'channel id', default=None)
1626 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1627 playlist_id = 'UU' + channel_playlist_id[2:]
1628 return self.url_result(
1629 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1631 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1632 autogenerated = re.search(r'''(?x)
1634 channel-header-autogenerated-label|
1635 yt-channel-title-autogenerated
1636 )[^"]*"''', channel_page) is not None
1639 # The videos are contained in a single page
1640 # the ajax pages can't be used, they are empty
1643 video_id, 'Youtube', video_id=video_id,
1644 video_title=video_title)
1645 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1646 return self.playlist_result(entries, channel_id)
1649 more_widget_html = content_html = channel_page
1650 for pagenum in itertools.count(1):
1652 for video_id, video_title in self.extract_videos_from_page(content_html):
1653 yield self.url_result(
1654 video_id, 'Youtube', video_id=video_id,
1655 video_title=video_title)
1658 r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1663 more = self._download_json(
1664 'https://youtube.com/%s' % mobj.group('more'), channel_id,
1665 'Downloading page #%s' % (pagenum + 1),
1666 transform_source=uppercase_escape)
1667 content_html = more['content_html']
1668 more_widget_html = more['load_more_widget_html']
1670 return self.playlist_result(_entries(), channel_id)
1673 class YoutubeUserIE(YoutubeChannelIE):
1674 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1675 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1676 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1677 IE_NAME = 'youtube:user'
1680 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1681 'playlist_mincount': 320,
1683 'title': 'TheLinuxFoundation',
1686 'url': 'ytuser:phihag',
1687 'only_matching': True,
1691 def suitable(cls, url):
1692 # Don't return True if the url can be extracted with other youtube
1693 # extractor, the regex would is too permissive and it would match.
1694 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1695 if any(ie.suitable(url) for ie in other_ies):
1698 return super(YoutubeUserIE, cls).suitable(url)
1701 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1702 IE_DESC = 'YouTube.com searches'
1703 # there doesn't appear to be a real limit, for example if you search for
1704 # 'python' you get more than 8.000.000 results
1705 _MAX_RESULTS = float('inf')
1706 IE_NAME = 'youtube:search'
1707 _SEARCH_KEY = 'ytsearch'
1708 _EXTRA_QUERY_ARGS = {}
1711 def _get_n_results(self, query, n):
1712 """Get a specified number of results for a query"""
1717 for pagenum in itertools.count(1):
1719 'search_query': query.encode('utf-8'),
1723 url_query.update(self._EXTRA_QUERY_ARGS)
1724 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1725 data = self._download_json(
1726 result_url, video_id='query "%s"' % query,
1727 note='Downloading page %s' % pagenum,
1728 errnote='Unable to download API page')
1729 html_content = data[1]['body']['content']
1731 if 'class="search-message' in html_content:
1732 raise ExtractorError(
1733 '[youtube] No video results', expected=True)
1735 new_videos = self._ids_to_results(orderedSet(re.findall(
1736 r'href="/watch\?v=(.{11})', html_content)))
1737 videos += new_videos
1738 if not new_videos or len(videos) > limit:
1743 return self.playlist_result(videos, query)
1746 class YoutubeSearchDateIE(YoutubeSearchIE):
1747 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1748 _SEARCH_KEY = 'ytsearchdate'
1749 IE_DESC = 'YouTube.com searches, newest videos first'
1750 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1753 class YoutubeSearchURLIE(InfoExtractor):
1754 IE_DESC = 'YouTube.com search URLs'
1755 IE_NAME = 'youtube:search_url'
1756 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1758 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1759 'playlist_mincount': 5,
1761 'title': 'youtube-dl test video',
1765 def _real_extract(self, url):
1766 mobj = re.match(self._VALID_URL, url)
1767 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1769 webpage = self._download_webpage(url, query)
1770 result_code = self._search_regex(
1771 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1773 part_codes = re.findall(
1774 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1776 for part_code in part_codes:
1777 part_title = self._html_search_regex(
1778 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1779 part_url_snippet = self._html_search_regex(
1780 r'(?s)href="([^"]+)"', part_code, 'item URL')
1781 part_url = compat_urlparse.urljoin(
1782 'https://www.youtube.com/', part_url_snippet)
1786 'title': part_title,
1790 '_type': 'playlist',
1796 class YoutubeShowIE(InfoExtractor):
1797 IE_DESC = 'YouTube.com (multi-season) shows'
1798 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1799 IE_NAME = 'youtube:show'
1801 'url': 'http://www.youtube.com/show/airdisasters',
1802 'playlist_mincount': 3,
1804 'id': 'airdisasters',
1805 'title': 'Air Disasters',
1809 def _real_extract(self, url):
1810 mobj = re.match(self._VALID_URL, url)
1811 playlist_id = mobj.group('id')
1812 webpage = self._download_webpage(
1813 url, playlist_id, 'Downloading show webpage')
1814 # There's one playlist for each season of the show
1815 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1816 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1819 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1820 for season in m_seasons
1822 title = self._og_search_title(webpage, fatal=False)
1825 '_type': 'playlist',
1832 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1834 Base class for feed extractors
1835 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1837 _LOGIN_REQUIRED = True
1841 return 'youtube:%s' % self._FEED_NAME
1843 def _real_initialize(self):
1846 def _real_extract(self, url):
1847 page = self._download_webpage(
1848 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1850 # The extraction process is the same as for playlists, but the regex
1851 # for the video ids doesn't contain an index
1853 more_widget_html = content_html = page
1854 for page_num in itertools.count(1):
1855 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1857 # 'recommended' feed has infinite 'load more' and each new portion spins
1858 # the same videos in (sometimes) slightly different order, so we'll check
1859 # for unicity and break when portion has no new videos
1860 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1866 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1870 more = self._download_json(
1871 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1872 'Downloading page #%s' % page_num,
1873 transform_source=uppercase_escape)
1874 content_html = more['content_html']
1875 more_widget_html = more['load_more_widget_html']
1877 return self.playlist_result(
1878 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1881 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1882 IE_NAME = 'youtube:watchlater'
1883 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1884 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1886 _TESTS = [] # override PlaylistIE tests
1888 def _real_extract(self, url):
1889 return self._extract_playlist('WL')
1892 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1893 IE_NAME = 'youtube:favorites'
1894 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1895 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1896 _LOGIN_REQUIRED = True
1898 def _real_extract(self, url):
1899 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1900 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1901 return self.url_result(playlist_id, 'YoutubePlaylist')
1904 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1905 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1906 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1907 _FEED_NAME = 'recommended'
1908 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1911 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1912 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1913 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1914 _FEED_NAME = 'subscriptions'
1915 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1918 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1919 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1920 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1921 _FEED_NAME = 'history'
1922 _PLAYLIST_TITLE = 'Youtube History'
1925 class YoutubeTruncatedURLIE(InfoExtractor):
1926 IE_NAME = 'youtube:truncated_url'
1927 IE_DESC = False # Do not list
1928 _VALID_URL = r'''(?x)
1930 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1933 annotation_id=annotation_[^&]+|
1938 attribution_link\?a=[^&]+
1944 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1945 'only_matching': True,
1947 'url': 'http://www.youtube.com/watch?',
1948 'only_matching': True,
1950 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1951 'only_matching': True,
1953 'url': 'https://www.youtube.com/watch?feature=foo',
1954 'only_matching': True,
1956 'url': 'https://www.youtube.com/watch?hl=en-GB',
1957 'only_matching': True,
1960 def _real_extract(self, url):
1961 raise ExtractorError(
1962 'Did you forget to quote the URL? Remember that & is a meta '
1963 'character in most shells, so you want to put the URL in quotes, '
1965 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1966 ' or simply youtube-dl BaW_jenozKc .',
1970 class YoutubeTruncatedIDIE(InfoExtractor):
1971 IE_NAME = 'youtube:truncated_id'
1972 IE_DESC = False # Do not list
1973 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1976 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1977 'only_matching': True,
1980 def _real_extract(self, url):
1981 video_id = self._match_id(url)
1982 raise ExtractorError(
1983 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),