3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
31 get_element_by_attribute,
46 class YoutubeBaseInfoExtractor(InfoExtractor):
47 """Provide base functions for Youtube extractors"""
48 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
49 _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
50 _NETRC_MACHINE = 'youtube'
51 # If True it will raise an error if no login info is provided
52 _LOGIN_REQUIRED = False
54 def _set_language(self):
56 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
57 # YouTube sets the expire time to about two months
58 expire_time=time.time() + 2 * 30 * 24 * 3600)
60 def _ids_to_results(self, ids):
62 self.url_result(vid_id, 'Youtube', video_id=vid_id)
67 Attempt to log in to YouTube.
68 True is returned if successful or skipped.
69 False is returned if login failed.
71 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
73 (username, password) = self._get_login_info()
74 # No authentication to be performed
76 if self._LOGIN_REQUIRED:
77 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
80 login_page = self._download_webpage(
81 self._LOGIN_URL, None,
82 note='Downloading login page',
83 errnote='unable to fetch login page', fatal=False)
84 if login_page is False:
87 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
88 login_page, 'Login GALX parameter')
92 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
97 'PersistentCookie': 'yes',
99 'bgresponse': 'js_disabled',
100 'checkConnection': '',
101 'checkedDomains': 'youtube',
108 'service': 'youtube',
113 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
115 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
116 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
118 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
119 login_results = self._download_webpage(
121 note='Logging in', errnote='unable to log in', fatal=False)
122 if login_results is False:
125 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
126 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
129 # TODO add SMS and phone call support - these require making a request and then prompting the user
131 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
132 tfa_code = self._get_tfa_info()
135 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
136 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 # Unlike the first login form, secTok and timeStmp are both required for the TFA form
141 match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
143 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
144 secTok = match.group(1)
145 match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
147 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
148 timeStmp = match.group(1)
151 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
153 'smsUserPin': tfa_code,
154 'smsVerifyPin': 'Verify',
156 'PersistentCookie': 'yes',
157 'checkConnection': '',
158 'checkedDomains': 'youtube',
161 'timeStmp': timeStmp,
162 'service': 'youtube',
165 tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
166 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
168 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
169 tfa_results = self._download_webpage(
171 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
173 if tfa_results is False:
176 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
177 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
179 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
180 self._downloader.report_warning('unable to log in - did the page structure change?')
182 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
183 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
186 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
187 self._downloader.report_warning('unable to log in: bad username or password')
191 def _real_initialize(self):
192 if self._downloader is None:
195 if not self._login():
199 class YoutubeIE(YoutubeBaseInfoExtractor):
200 IE_DESC = 'YouTube.com'
201 _VALID_URL = r"""(?x)^
203 (?:https?://|//) # http(s):// or protocol-independent URL
204 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
205 (?:www\.)?deturl\.com/www\.youtube\.com/|
206 (?:www\.)?pwnyoutube\.com/|
207 (?:www\.)?yourepeat\.com/|
208 tube\.majestyc\.net/|
209 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
210 (?:.*?\#/)? # handle anchor (#/) redirect urls
211 (?: # the various things that can precede the ID:
212 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
213 |(?: # or the v= param in all its forms
214 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
215 (?:\?|\#!?) # the params delimiter ? or # or #!
216 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
220 |youtu\.be/ # just youtu.be/xxxx
221 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
223 )? # all until now is optional -> you can pass the naked ID
224 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
225 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
226 (?(1).+)? # if we found the ID, everything can follow
228 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
230 '5': {'ext': 'flv', 'width': 400, 'height': 240},
231 '6': {'ext': 'flv', 'width': 450, 'height': 270},
232 '13': {'ext': '3gp'},
233 '17': {'ext': '3gp', 'width': 176, 'height': 144},
234 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
235 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
236 '34': {'ext': 'flv', 'width': 640, 'height': 360},
237 '35': {'ext': 'flv', 'width': 854, 'height': 480},
238 '36': {'ext': '3gp', 'width': 320, 'height': 240},
239 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
240 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
241 '43': {'ext': 'webm', 'width': 640, 'height': 360},
242 '44': {'ext': 'webm', 'width': 854, 'height': 480},
243 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
244 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
245 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
246 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
250 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
251 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
252 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
253 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
254 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
255 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
256 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
258 # Apple HTTP Live Streaming
259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
275 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
276 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
277 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
278 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
281 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
282 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
283 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
286 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
287 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
288 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
289 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
290 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
291 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
292 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
293 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
294 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
295 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
296 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
297 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
298 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
299 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
303 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
304 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
305 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
306 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
309 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
310 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
312 # Dash webm audio with opus inside
313 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
314 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
315 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
318 '_rtmp': {'protocol': 'rtmp'},
324 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
328 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
329 'uploader': 'Philipp Hagemeister',
330 'uploader_id': 'phihag',
331 'upload_date': '20121002',
332 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
333 'categories': ['Science & Technology'],
334 'tags': ['youtube-dl'],
336 'dislike_count': int,
342 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
343 'note': 'Test generic use_cipher_signature video (#897)',
347 'upload_date': '20120506',
348 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
349 'description': 'md5:782e8651347686cba06e58f71ab51773',
350 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
351 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
352 'iconic ep', 'iconic', 'love', 'it'],
353 'uploader': 'Icona Pop',
354 'uploader_id': 'IconaPop',
358 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
359 'note': 'Test VEVO video with age protection (#956)',
363 'upload_date': '20130703',
364 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
365 'description': 'md5:64249768eec3bc4276236606ea996373',
366 'uploader': 'justintimberlakeVEVO',
367 'uploader_id': 'justintimberlakeVEVO',
371 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
372 'note': 'Embed-only video (#1746)',
376 'upload_date': '20120608',
377 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
378 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
379 'uploader': 'SET India',
380 'uploader_id': 'setindia'
384 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
385 'note': '256k DASH audio (format 141) via DASH manifest',
389 'upload_date': '20121002',
390 'uploader_id': '8KVIDEO',
392 'uploader': '8KVIDEO',
393 'title': 'UHDTV TEST 8K VIDEO.mp4'
396 'youtube_include_dash_manifest': True,
400 # DASH manifest with encrypted signature
402 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
406 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
407 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
408 'uploader': 'AfrojackVEVO',
409 'uploader_id': 'AfrojackVEVO',
410 'upload_date': '20131011',
413 'youtube_include_dash_manifest': True,
417 # JS player signature function name containing $
419 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
423 'title': 'Taylor Swift - Shake It Off',
424 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
425 'uploader': 'TaylorSwiftVEVO',
426 'uploader_id': 'TaylorSwiftVEVO',
427 'upload_date': '20140818',
430 'youtube_include_dash_manifest': True,
436 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
440 'upload_date': '20100909',
441 'uploader': 'The Amazing Atheist',
442 'uploader_id': 'TheAmazingAtheist',
443 'title': 'Burning Everyone\'s Koran',
444 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
447 # Normal age-gate video (No vevo, embed allowed)
449 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
453 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
454 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
455 'uploader': 'The Witcher',
456 'uploader_id': 'WitcherGame',
457 'upload_date': '20140605',
460 # Age-gate video with encrypted signature
462 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
466 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
467 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
468 'uploader': 'LloydVEVO',
469 'uploader_id': 'LloydVEVO',
470 'upload_date': '20110629',
473 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
475 'url': '__2ABJjxzNo',
479 'upload_date': '20100430',
480 'uploader_id': 'deadmau5',
481 'description': 'md5:12c56784b8032162bb936a5f76d55360',
482 'uploader': 'deadmau5',
483 'title': 'Deadmau5 - Some Chords (HD)',
485 'expected_warnings': [
486 'DASH manifest missing',
489 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
491 'url': 'lqQg6PlCWgI',
495 'upload_date': '20120731',
496 'uploader_id': 'olympic',
497 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
498 'uploader': 'Olympics',
499 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
502 'skip_download': 'requires avconv',
507 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
511 'stretched_ratio': 16 / 9.,
512 'upload_date': '20110310',
513 'uploader_id': 'AllenMeow',
514 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
516 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
519 # url_encoded_fmt_stream_map is empty string
521 'url': 'qEJwOuvDf7I',
525 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
527 'upload_date': '20150404',
528 'uploader_id': 'spbelect',
529 'uploader': 'Наблюдатели Петербурга',
532 'skip_download': 'requires avconv',
535 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
537 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
541 'title': 'md5:7b81415841e02ecd4313668cde88737a',
542 'description': 'md5:116377fd2963b81ec4ce64b542173306',
543 'upload_date': '20150625',
544 'uploader_id': 'dorappi2000',
545 'uploader': 'dorappi2000',
546 'formats': 'mincount:33',
549 # DASH manifest with segment_list
551 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
552 'md5': '8ce563a1d667b599d21064e982ab9e31',
556 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
557 'uploader': 'Airtek',
558 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
559 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
560 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
563 'youtube_include_dash_manifest': True,
564 'format': '135', # bestvideo
568 # Multifeed videos (multiple cameras), URL is for Main Camera
569 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
572 'title': 'teamPGP: Rocket League Noob Stream',
573 'description': 'md5:dc7872fb300e143831327f1bae3af010',
579 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
580 'description': 'md5:dc7872fb300e143831327f1bae3af010',
581 'upload_date': '20150721',
582 'uploader': 'Beer Games Beer',
583 'uploader_id': 'beergamesbeer',
589 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
590 'description': 'md5:dc7872fb300e143831327f1bae3af010',
591 'upload_date': '20150721',
592 'uploader': 'Beer Games Beer',
593 'uploader_id': 'beergamesbeer',
599 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
600 'description': 'md5:dc7872fb300e143831327f1bae3af010',
601 'upload_date': '20150721',
602 'uploader': 'Beer Games Beer',
603 'uploader_id': 'beergamesbeer',
609 'title': 'teamPGP: Rocket League Noob Stream (zim)',
610 'description': 'md5:dc7872fb300e143831327f1bae3af010',
611 'upload_date': '20150721',
612 'uploader': 'Beer Games Beer',
613 'uploader_id': 'beergamesbeer',
617 'skip_download': True,
622 def __init__(self, *args, **kwargs):
623 super(YoutubeIE, self).__init__(*args, **kwargs)
624 self._player_cache = {}
626 def report_video_info_webpage_download(self, video_id):
627 """Report attempt to download video info webpage."""
628 self.to_screen('%s: Downloading video info webpage' % video_id)
630 def report_information_extraction(self, video_id):
631 """Report attempt to extract video information."""
632 self.to_screen('%s: Extracting video information' % video_id)
634 def report_unavailable_format(self, video_id, format):
635 """Report extracted video URL."""
636 self.to_screen('%s: Format %s not available' % (video_id, format))
638 def report_rtmp_download(self):
639 """Indicate the download will use the RTMP protocol."""
640 self.to_screen('RTMP download detected')
642 def _signature_cache_id(self, example_sig):
643 """ Return a string representation of a signature """
644 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
646 def _extract_signature_function(self, video_id, player_url, example_sig):
648 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
651 raise ExtractorError('Cannot identify player %r' % player_url)
652 player_type = id_m.group('ext')
653 player_id = id_m.group('id')
655 # Read from filesystem cache
656 func_id = '%s_%s_%s' % (
657 player_type, player_id, self._signature_cache_id(example_sig))
658 assert os.path.basename(func_id) == func_id
660 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
661 if cache_spec is not None:
662 return lambda s: ''.join(s[i] for i in cache_spec)
665 'Downloading player %s' % player_url
666 if self._downloader.params.get('verbose') else
667 'Downloading %s player %s' % (player_type, player_id)
669 if player_type == 'js':
670 code = self._download_webpage(
671 player_url, video_id,
673 errnote='Download of %s failed' % player_url)
674 res = self._parse_sig_js(code)
675 elif player_type == 'swf':
676 urlh = self._request_webpage(
677 player_url, video_id,
679 errnote='Download of %s failed' % player_url)
681 res = self._parse_sig_swf(code)
683 assert False, 'Invalid player type %r' % player_type
685 test_string = ''.join(map(compat_chr, range(len(example_sig))))
686 cache_res = res(test_string)
687 cache_spec = [ord(c) for c in cache_res]
689 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
692 def _print_sig_code(self, func, example_sig):
693 def gen_sig_code(idxs):
694 def _genslice(start, end, step):
695 starts = '' if start == 0 else str(start)
696 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
697 steps = '' if step == 1 else (':%d' % step)
698 return 's[%s%s%s]' % (starts, ends, steps)
701 # Quelch pyflakes warnings - start will be set when step is set
702 start = '(Never used)'
703 for i, prev in zip(idxs[1:], idxs[:-1]):
707 yield _genslice(start, prev, step)
710 if i - prev in [-1, 1]:
719 yield _genslice(start, i, step)
721 test_string = ''.join(map(compat_chr, range(len(example_sig))))
722 cache_res = func(test_string)
723 cache_spec = [ord(c) for c in cache_res]
724 expr_code = ' + '.join(gen_sig_code(cache_spec))
725 signature_id_tuple = '(%s)' % (
726 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
727 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
728 ' return %s\n') % (signature_id_tuple, expr_code)
729 self.to_screen('Extracted signature function:\n' + code)
731 def _parse_sig_js(self, jscode):
732 funcname = self._search_regex(
733 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
734 'Initial JS player signature function name')
736 jsi = JSInterpreter(jscode)
737 initial_function = jsi.extract_function(funcname)
738 return lambda s: initial_function([s])
740 def _parse_sig_swf(self, file_contents):
741 swfi = SWFInterpreter(file_contents)
742 TARGET_CLASSNAME = 'SignatureDecipher'
743 searched_class = swfi.extract_class(TARGET_CLASSNAME)
744 initial_function = swfi.extract_function(searched_class, 'decipher')
745 return lambda s: initial_function([s])
747 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
748 """Turn the encrypted s field into a working signature"""
750 if player_url is None:
751 raise ExtractorError('Cannot decrypt signature without player_url')
753 if player_url.startswith('//'):
754 player_url = 'https:' + player_url
756 player_id = (player_url, self._signature_cache_id(s))
757 if player_id not in self._player_cache:
758 func = self._extract_signature_function(
759 video_id, player_url, s
761 self._player_cache[player_id] = func
762 func = self._player_cache[player_id]
763 if self._downloader.params.get('youtube_print_sig_code'):
764 self._print_sig_code(func, s)
766 except Exception as e:
767 tb = traceback.format_exc()
768 raise ExtractorError(
769 'Signature extraction failed: ' + tb, cause=e)
771 def _get_subtitles(self, video_id, webpage):
773 subs_doc = self._download_xml(
774 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
775 video_id, note=False)
776 except ExtractorError as err:
777 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
781 for track in subs_doc.findall('track'):
782 lang = track.attrib['lang_code']
783 if lang in sub_lang_list:
786 for ext in ['sbv', 'vtt', 'srt']:
787 params = compat_urllib_parse.urlencode({
791 'name': track.attrib['name'].encode('utf-8'),
794 'url': 'https://www.youtube.com/api/timedtext?' + params,
797 sub_lang_list[lang] = sub_formats
798 if not sub_lang_list:
799 self._downloader.report_warning('video doesn\'t have subtitles')
803 def _get_automatic_captions(self, video_id, webpage):
804 """We need the webpage for getting the captions url, pass it as an
805 argument to speed up the process."""
806 self.to_screen('%s: Looking for automatic captions' % video_id)
807 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
808 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
810 self._downloader.report_warning(err_msg)
812 player_config = json.loads(mobj.group(1))
814 args = player_config['args']
815 caption_url = args['ttsurl']
816 timestamp = args['timestamp']
817 # We get the available subtitles
818 list_params = compat_urllib_parse.urlencode({
823 list_url = caption_url + '&' + list_params
824 caption_list = self._download_xml(list_url, video_id)
825 original_lang_node = caption_list.find('track')
826 if original_lang_node is None:
827 self._downloader.report_warning('Video doesn\'t have automatic captions')
829 original_lang = original_lang_node.attrib['lang_code']
830 caption_kind = original_lang_node.attrib.get('kind', '')
833 for lang_node in caption_list.findall('target'):
834 sub_lang = lang_node.attrib['lang_code']
836 for ext in ['sbv', 'vtt', 'srt']:
837 params = compat_urllib_parse.urlencode({
838 'lang': original_lang,
842 'kind': caption_kind,
845 'url': caption_url + '&' + params,
848 sub_lang_list[sub_lang] = sub_formats
850 # An extractor error can be raise by the download process if there are
851 # no automatic captions but there are subtitles
852 except (KeyError, ExtractorError):
853 self._downloader.report_warning(err_msg)
857 def extract_id(cls, url):
858 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
860 raise ExtractorError('Invalid URL: %s' % url)
861 video_id = mobj.group(2)
864 def _extract_from_m3u8(self, manifest_url, video_id):
867 def _get_urls(_manifest):
868 lines = _manifest.split('\n')
869 urls = filter(lambda l: l and not l.startswith('#'),
872 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
873 formats_urls = _get_urls(manifest)
874 for format_url in formats_urls:
875 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
876 url_map[itag] = format_url
879 def _extract_annotations(self, video_id):
880 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
881 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
883 def _parse_dash_manifest(
884 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
885 def decrypt_sig(mobj):
887 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
888 return '/signature/%s' % dec_s
889 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
890 dash_doc = self._download_xml(
891 dash_manifest_url, video_id,
892 note='Downloading DASH manifest',
893 errnote='Could not download DASH manifest',
896 if dash_doc is False:
900 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
901 mime_type = a.attrib.get('mimeType')
902 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
903 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
906 if mime_type == 'text/vtt':
907 # TODO implement WebVTT downloading
909 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
910 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
911 format_id = r.attrib['id']
912 video_url = url_el.text
913 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
915 'format_id': format_id,
917 'width': int_or_none(r.attrib.get('width')),
918 'height': int_or_none(r.attrib.get('height')),
919 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
920 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
921 'filesize': filesize,
922 'fps': int_or_none(r.attrib.get('frameRate')),
924 if segment_list is not None:
926 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
927 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
928 'protocol': 'http_dash_segments',
931 existing_format = next(
933 if fo['format_id'] == format_id)
934 except StopIteration:
935 full_info = self._formats.get(format_id, {}).copy()
937 codecs = r.attrib.get('codecs')
939 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
940 full_info['vcodec'] = codecs
941 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
942 full_info['acodec'] = codecs
943 formats.append(full_info)
945 existing_format.update(f)
947 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
950 def _real_extract(self, url):
951 url, smuggled_data = unsmuggle_url(url, {})
954 'http' if self._downloader.params.get('prefer_insecure', False)
959 parsed_url = compat_urllib_parse_urlparse(url)
960 for component in [parsed_url.fragment, parsed_url.query]:
961 query = compat_parse_qs(component)
962 if start_time is None and 't' in query:
963 start_time = parse_duration(query['t'][0])
964 if start_time is None and 'start' in query:
965 start_time = parse_duration(query['start'][0])
966 if end_time is None and 'end' in query:
967 end_time = parse_duration(query['end'][0])
969 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
970 mobj = re.search(self._NEXT_URL_RE, url)
972 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
973 video_id = self.extract_id(url)
976 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
977 video_webpage = self._download_webpage(url, video_id)
979 # Attempt to extract SWF player URL
980 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
982 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
988 def add_dash_mpd(video_info):
989 dash_mpd = video_info.get('dashmpd')
990 if dash_mpd and dash_mpd[0] not in dash_mpds:
991 dash_mpds.append(dash_mpd[0])
996 if re.search(r'player-age-gate-content">', video_webpage) is not None:
998 # We simulate the access to the video from www.youtube.com/v/{video_id}
999 # this can be viewed without login into Youtube
1000 url = proto + '://www.youtube.com/embed/%s' % video_id
1001 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1002 data = compat_urllib_parse.urlencode({
1003 'video_id': video_id,
1004 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1005 'sts': self._search_regex(
1006 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1008 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1009 video_info_webpage = self._download_webpage(
1010 video_info_url, video_id,
1011 note='Refetching age-gated info webpage',
1012 errnote='unable to download video info webpage')
1013 video_info = compat_parse_qs(video_info_webpage)
1014 add_dash_mpd(video_info)
1018 # Try looking directly into the video webpage
1019 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1021 json_code = uppercase_escape(mobj.group(1))
1022 ytplayer_config = json.loads(json_code)
1023 args = ytplayer_config['args']
1024 if args.get('url_encoded_fmt_stream_map'):
1025 # Convert to the same format returned by compat_parse_qs
1026 video_info = dict((k, [v]) for k, v in args.items())
1027 add_dash_mpd(video_info)
1028 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1030 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1031 # We also try looking in get_video_info since it may contain different dashmpd
1032 # URL that points to a DASH manifest with possibly different itag set (some itags
1033 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1034 # manifest pointed by get_video_info's dashmpd).
1035 # The general idea is to take a union of itags of both DASH manifests (for example
1036 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1037 self.report_video_info_webpage_download(video_id)
1038 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1040 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1041 % (proto, video_id, el_type))
1042 video_info_webpage = self._download_webpage(
1044 video_id, note=False,
1045 errnote='unable to download video info webpage')
1046 get_video_info = compat_parse_qs(video_info_webpage)
1047 if get_video_info.get('use_cipher_signature') != ['True']:
1048 add_dash_mpd(get_video_info)
1050 video_info = get_video_info
1051 if 'token' in get_video_info:
1053 if 'token' not in video_info:
1054 if 'reason' in video_info:
1055 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1056 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1058 raise ExtractorError('YouTube said: This video is available in %s only' % (
1059 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1061 raise ExtractorError(
1062 'YouTube said: %s' % video_info['reason'][0],
1063 expected=True, video_id=video_id)
1065 raise ExtractorError(
1066 '"token" parameter not in video info for unknown reason',
1070 if 'title' in video_info:
1071 video_title = video_info['title'][0]
1073 self._downloader.report_warning('Unable to extract video title')
1077 video_description = get_element_by_id("eow-description", video_webpage)
1078 if video_description:
1079 video_description = re.sub(r'''(?x)
1081 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1083 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1084 class="yt-uix-redirect-link"\s*>
1087 ''', r'\1', video_description)
1088 video_description = clean_html(video_description)
1090 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1092 video_description = unescapeHTML(fd_mobj.group(1))
1094 video_description = ''
1096 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1097 if not self._downloader.params.get('noplaylist'):
1100 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1101 for feed in multifeed_metadata_list.split(','):
1102 feed_data = compat_parse_qs(feed)
1104 '_type': 'url_transparent',
1105 'ie_key': 'Youtube',
1107 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1108 {'force_singlefeed': True}),
1109 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1111 feed_ids.append(feed_data['id'][0])
1113 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1114 % (', '.join(feed_ids), video_id))
1115 return self.playlist_result(entries, video_id, video_title, video_description)
1116 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1118 if 'view_count' in video_info:
1119 view_count = int(video_info['view_count'][0])
1123 # Check for "rental" videos
1124 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1125 raise ExtractorError('"rental" videos not supported')
1127 # Start extracting information
1128 self.report_information_extraction(video_id)
1131 if 'author' not in video_info:
1132 raise ExtractorError('Unable to extract uploader name')
1133 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1136 video_uploader_id = None
1137 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1138 if mobj is not None:
1139 video_uploader_id = mobj.group(1)
1141 self._downloader.report_warning('unable to extract uploader nickname')
1144 # We try first to get a high quality image:
1145 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1146 video_webpage, re.DOTALL)
1147 if m_thumb is not None:
1148 video_thumbnail = m_thumb.group(1)
1149 elif 'thumbnail_url' not in video_info:
1150 self._downloader.report_warning('unable to extract video thumbnail')
1151 video_thumbnail = None
1152 else: # don't panic if we can't find it
1153 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1156 upload_date = self._html_search_meta(
1157 'datePublished', video_webpage, 'upload date', default=None)
1159 upload_date = self._search_regex(
1160 [r'(?s)id="eow-date.*?>(.*?)</span>',
1161 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1162 video_webpage, 'upload date', default=None)
1164 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1165 upload_date = unified_strdate(upload_date)
1167 m_cat_container = self._search_regex(
1168 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1169 video_webpage, 'categories', default=None)
1171 category = self._html_search_regex(
1172 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1174 video_categories = None if category is None else [category]
1176 video_categories = None
1179 unescapeHTML(m.group('content'))
1180 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1182 def _extract_count(count_name):
1183 return str_to_int(self._search_regex(
1184 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1185 % re.escape(count_name),
1186 video_webpage, count_name, default=None))
1188 like_count = _extract_count('like')
1189 dislike_count = _extract_count('dislike')
1192 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1193 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1195 if 'length_seconds' not in video_info:
1196 self._downloader.report_warning('unable to extract video duration')
1197 video_duration = None
1199 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1202 video_annotations = None
1203 if self._downloader.params.get('writeannotations', False):
1204 video_annotations = self._extract_annotations(video_id)
1206 def _map_to_format_list(urlmap):
1208 for itag, video_real_url in urlmap.items():
1211 'url': video_real_url,
1212 'player_url': player_url,
1214 if itag in self._formats:
1215 dct.update(self._formats[itag])
1219 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1220 self.report_rtmp_download()
1222 'format_id': '_rtmp',
1224 'url': video_info['conn'][0],
1225 'player_url': player_url,
1227 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1228 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1229 if 'rtmpe%3Dyes' in encoded_url_map:
1230 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1232 for url_data_str in encoded_url_map.split(','):
1233 url_data = compat_parse_qs(url_data_str)
1234 if 'itag' not in url_data or 'url' not in url_data:
1236 format_id = url_data['itag'][0]
1237 url = url_data['url'][0]
1239 if 'sig' in url_data:
1240 url += '&signature=' + url_data['sig'][0]
1241 elif 's' in url_data:
1242 encrypted_sig = url_data['s'][0]
1243 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1245 jsplayer_url_json = self._search_regex(
1247 embed_webpage if age_gate else video_webpage,
1248 'JS player URL (1)', default=None)
1249 if not jsplayer_url_json and not age_gate:
1250 # We need the embed website after all
1251 if embed_webpage is None:
1252 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1253 embed_webpage = self._download_webpage(
1254 embed_url, video_id, 'Downloading embed webpage')
1255 jsplayer_url_json = self._search_regex(
1256 ASSETS_RE, embed_webpage, 'JS player URL')
1258 player_url = json.loads(jsplayer_url_json)
1259 if player_url is None:
1260 player_url_json = self._search_regex(
1261 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1262 video_webpage, 'age gate player URL')
1263 player_url = json.loads(player_url_json)
1265 if self._downloader.params.get('verbose'):
1266 if player_url is None:
1267 player_version = 'unknown'
1268 player_desc = 'unknown'
1270 if player_url.endswith('swf'):
1271 player_version = self._search_regex(
1272 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1273 'flash player', fatal=False)
1274 player_desc = 'flash player %s' % player_version
1276 player_version = self._search_regex(
1277 r'html5player-([^/]+?)(?:/html5player)?\.js',
1279 'html5 player', fatal=False)
1280 player_desc = 'html5 player %s' % player_version
1282 parts_sizes = self._signature_cache_id(encrypted_sig)
1283 self.to_screen('{%s} signature length %s, %s' %
1284 (format_id, parts_sizes, player_desc))
1286 signature = self._decrypt_signature(
1287 encrypted_sig, video_id, player_url, age_gate)
1288 url += '&signature=' + signature
1289 if 'ratebypass' not in url:
1290 url += '&ratebypass=yes'
1291 url_map[format_id] = url
1292 formats = _map_to_format_list(url_map)
1293 elif video_info.get('hlsvp'):
1294 manifest_url = video_info['hlsvp'][0]
1295 url_map = self._extract_from_m3u8(manifest_url, video_id)
1296 formats = _map_to_format_list(url_map)
1298 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1300 # Look for the DASH manifest
1301 if self._downloader.params.get('youtube_include_dash_manifest', True):
1302 dash_mpd_fatal = True
1303 for dash_manifest_url in dash_mpds:
1306 for df in self._parse_dash_manifest(
1307 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1308 # Do not overwrite DASH format found in some previous DASH manifest
1309 if df['format_id'] not in dash_formats:
1310 dash_formats[df['format_id']] = df
1311 # Additional DASH manifests may end up in HTTP Error 403 therefore
1312 # allow them to fail without bug report message if we already have
1313 # some DASH manifest succeeded. This is temporary workaround to reduce
1314 # burst of bug reports until we figure out the reason and whether it
1315 # can be fixed at all.
1316 dash_mpd_fatal = False
1317 except (ExtractorError, KeyError) as e:
1318 self.report_warning(
1319 'Skipping DASH manifest: %r' % e, video_id)
1321 # Remove the formats we found through non-DASH, they
1322 # contain less info and it can be wrong, because we use
1323 # fixed values (for example the resolution). See
1324 # https://github.com/rg3/youtube-dl/issues/5774 for an
1326 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1327 formats.extend(dash_formats.values())
1329 # Check for malformed aspect ratio
1330 stretched_m = re.search(
1331 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1334 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1336 if f.get('vcodec') != 'none':
1337 f['stretched_ratio'] = ratio
1339 self._sort_formats(formats)
1343 'uploader': video_uploader,
1344 'uploader_id': video_uploader_id,
1345 'upload_date': upload_date,
1346 'title': video_title,
1347 'thumbnail': video_thumbnail,
1348 'description': video_description,
1349 'categories': video_categories,
1351 'subtitles': video_subtitles,
1352 'automatic_captions': automatic_captions,
1353 'duration': video_duration,
1354 'age_limit': 18 if age_gate else 0,
1355 'annotations': video_annotations,
1356 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1357 'view_count': view_count,
1358 'like_count': like_count,
1359 'dislike_count': dislike_count,
1360 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1363 'start_time': start_time,
1364 'end_time': end_time,
1368 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1369 IE_DESC = 'YouTube.com playlists'
1370 _VALID_URL = r"""(?x)(?:
1375 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1376 \? (?:.*?&)*? (?:p|a|list)=
1380 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1381 # Top tracks, they can also include dots
1386 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1388 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1389 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
1390 IE_NAME = 'youtube:playlist'
1392 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1394 'title': 'ytdl test PL',
1395 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1397 'playlist_count': 3,
1399 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1401 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1402 'title': 'YDL_Empty_List',
1404 'playlist_count': 0,
1406 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1407 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1409 'title': '29C3: Not my department',
1410 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1412 'playlist_count': 95,
1414 'note': 'issue #673',
1415 'url': 'PLBB231211A4F62143',
1417 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1418 'id': 'PLBB231211A4F62143',
1420 'playlist_mincount': 26,
1422 'note': 'Large playlist',
1423 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1425 'title': 'Uploads from Cauchemar',
1426 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1428 'playlist_mincount': 799,
1430 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1432 'title': 'YDL_safe_search',
1433 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1435 'playlist_count': 2,
1438 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1439 'playlist_count': 4,
1442 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1445 'note': 'Embedded SWF player',
1446 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1447 'playlist_count': 4,
1450 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1453 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1454 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1456 'title': 'Uploads from Interstellar Movie',
1457 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1459 'playlist_mincout': 21,
1462 def _real_initialize(self):
1465 def _extract_mix(self, playlist_id):
1466 # The mixes are generated from a single video
1467 # the id of the playlist is just 'RD' + video_id
1468 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1469 webpage = self._download_webpage(
1470 url, playlist_id, 'Downloading Youtube mix')
1471 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1473 search_title('playlist-title') or
1474 search_title('title long-title') or
1475 search_title('title'))
1476 title = clean_html(title_span)
1477 ids = orderedSet(re.findall(
1478 r'''(?xs)data-video-username=".*?".*?
1479 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1481 url_results = self._ids_to_results(ids)
1483 return self.playlist_result(url_results, playlist_id, title)
1485 def _extract_playlist(self, playlist_id):
1486 url = self._TEMPLATE_URL % playlist_id
1487 page = self._download_webpage(url, playlist_id)
1489 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1490 match = match.strip()
1491 # Check if the playlist exists or is private
1492 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1493 raise ExtractorError(
1494 'The playlist doesn\'t exist or is private, use --username or '
1495 '--netrc to access it.',
1497 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1498 raise ExtractorError(
1499 'Invalid parameters. Maybe URL is incorrect.',
1501 elif re.match(r'[^<]*Choose your language[^<]*', match):
1504 self.report_warning('Youtube gives an alert message: ' + match)
1506 # Extract the video ids from the playlist pages
1508 more_widget_html = content_html = page
1509 for page_num in itertools.count(1):
1510 matches = re.finditer(self._VIDEO_RE, content_html)
1511 # We remove the duplicates and the link with index 0
1512 # (it's not the first video of the playlist)
1513 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1514 for vid_id in new_ids:
1515 yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
1517 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1521 more = self._download_json(
1522 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1523 'Downloading page #%s' % page_num,
1524 transform_source=uppercase_escape)
1525 content_html = more['content_html']
1526 if not content_html.strip():
1527 # Some webpages show a "Load more" button but they don't
1530 more_widget_html = more['load_more_widget_html']
1532 playlist_title = self._html_search_regex(
1533 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1536 return self.playlist_result(_entries(), playlist_id, playlist_title)
1538 def _real_extract(self, url):
1539 # Extract playlist id
1540 mobj = re.match(self._VALID_URL, url)
1542 raise ExtractorError('Invalid URL: %s' % url)
1543 playlist_id = mobj.group(1) or mobj.group(2)
1545 # Check if it's a video-specific URL
1546 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1547 if 'v' in query_dict:
1548 video_id = query_dict['v'][0]
1549 if self._downloader.params.get('noplaylist'):
1550 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1551 return self.url_result(video_id, 'Youtube', video_id=video_id)
1553 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1555 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1556 # Mixes require a custom extraction process
1557 return self._extract_mix(playlist_id)
1559 return self._extract_playlist(playlist_id)
1562 class YoutubeChannelIE(InfoExtractor):
1563 IE_DESC = 'YouTube.com channels'
1564 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1565 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1566 IE_NAME = 'youtube:channel'
1568 'note': 'paginated channel',
1569 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1570 'playlist_mincount': 91,
1572 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1577 def extract_videos_from_page(page):
1580 for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
1581 video_id = mobj.group('id')
1582 video_title = unescapeHTML(mobj.group('title'))
1584 idx = ids_in_page.index(video_id)
1585 if video_title and not titles_in_page[idx]:
1586 titles_in_page[idx] = video_title
1588 ids_in_page.append(video_id)
1589 titles_in_page.append(video_title)
1590 return zip(ids_in_page, titles_in_page)
1592 def _real_extract(self, url):
1593 channel_id = self._match_id(url)
1595 url = self._TEMPLATE_URL % channel_id
1597 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1598 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1599 # otherwise fallback on channel by page extraction
1600 channel_page = self._download_webpage(
1601 url + '?view=57', channel_id,
1602 'Downloading channel page', fatal=False)
1603 channel_playlist_id = self._html_search_meta(
1604 'channelId', channel_page, 'channel id', default=None)
1605 if not channel_playlist_id:
1606 channel_playlist_id = self._search_regex(
1607 r'data-channel-external-id="([^"]+)"',
1608 channel_page, 'channel id', default=None)
1609 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1610 playlist_id = 'UU' + channel_playlist_id[2:]
1611 return self.url_result(
1612 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1614 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1615 autogenerated = re.search(r'''(?x)
1617 channel-header-autogenerated-label|
1618 yt-channel-title-autogenerated
1619 )[^"]*"''', channel_page) is not None
1622 # The videos are contained in a single page
1623 # the ajax pages can't be used, they are empty
1626 video_id, 'Youtube', video_id=video_id,
1627 video_title=video_title)
1628 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1629 return self.playlist_result(entries, channel_id)
1632 more_widget_html = content_html = channel_page
1633 for pagenum in itertools.count(1):
1635 for video_id, video_title in self.extract_videos_from_page(content_html):
1636 yield self.url_result(
1637 video_id, 'Youtube', video_id=video_id,
1638 video_title=video_title)
1641 r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1646 more = self._download_json(
1647 'https://youtube.com/%s' % mobj.group('more'), channel_id,
1648 'Downloading page #%s' % (pagenum + 1),
1649 transform_source=uppercase_escape)
1650 content_html = more['content_html']
1651 more_widget_html = more['load_more_widget_html']
1653 return self.playlist_result(_entries(), channel_id)
1656 class YoutubeUserIE(YoutubeChannelIE):
1657 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1658 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1659 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1660 IE_NAME = 'youtube:user'
1663 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1664 'playlist_mincount': 320,
1666 'title': 'TheLinuxFoundation',
1669 'url': 'ytuser:phihag',
1670 'only_matching': True,
1674 def suitable(cls, url):
1675 # Don't return True if the url can be extracted with other youtube
1676 # extractor, the regex would is too permissive and it would match.
1677 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1678 if any(ie.suitable(url) for ie in other_ies):
1681 return super(YoutubeUserIE, cls).suitable(url)
1684 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1685 IE_DESC = 'YouTube.com searches'
1686 # there doesn't appear to be a real limit, for example if you search for
1687 # 'python' you get more than 8.000.000 results
1688 _MAX_RESULTS = float('inf')
1689 IE_NAME = 'youtube:search'
1690 _SEARCH_KEY = 'ytsearch'
1691 _EXTRA_QUERY_ARGS = {}
1694 def _get_n_results(self, query, n):
1695 """Get a specified number of results for a query"""
1700 for pagenum in itertools.count(1):
1702 'search_query': query.encode('utf-8'),
1706 url_query.update(self._EXTRA_QUERY_ARGS)
1707 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1708 data = self._download_json(
1709 result_url, video_id='query "%s"' % query,
1710 note='Downloading page %s' % pagenum,
1711 errnote='Unable to download API page')
1712 html_content = data[1]['body']['content']
1714 if 'class="search-message' in html_content:
1715 raise ExtractorError(
1716 '[youtube] No video results', expected=True)
1718 new_videos = self._ids_to_results(orderedSet(re.findall(
1719 r'href="/watch\?v=(.{11})', html_content)))
1720 videos += new_videos
1721 if not new_videos or len(videos) > limit:
1726 return self.playlist_result(videos, query)
1729 class YoutubeSearchDateIE(YoutubeSearchIE):
1730 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1731 _SEARCH_KEY = 'ytsearchdate'
1732 IE_DESC = 'YouTube.com searches, newest videos first'
1733 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1736 class YoutubeSearchURLIE(InfoExtractor):
1737 IE_DESC = 'YouTube.com search URLs'
1738 IE_NAME = 'youtube:search_url'
1739 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1741 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1742 'playlist_mincount': 5,
1744 'title': 'youtube-dl test video',
1748 def _real_extract(self, url):
1749 mobj = re.match(self._VALID_URL, url)
1750 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1752 webpage = self._download_webpage(url, query)
1753 result_code = self._search_regex(
1754 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1756 part_codes = re.findall(
1757 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1759 for part_code in part_codes:
1760 part_title = self._html_search_regex(
1761 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1762 part_url_snippet = self._html_search_regex(
1763 r'(?s)href="([^"]+)"', part_code, 'item URL')
1764 part_url = compat_urlparse.urljoin(
1765 'https://www.youtube.com/', part_url_snippet)
1769 'title': part_title,
1773 '_type': 'playlist',
1779 class YoutubeShowIE(InfoExtractor):
1780 IE_DESC = 'YouTube.com (multi-season) shows'
1781 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1782 IE_NAME = 'youtube:show'
1784 'url': 'http://www.youtube.com/show/airdisasters',
1785 'playlist_mincount': 3,
1787 'id': 'airdisasters',
1788 'title': 'Air Disasters',
1792 def _real_extract(self, url):
1793 mobj = re.match(self._VALID_URL, url)
1794 playlist_id = mobj.group('id')
1795 webpage = self._download_webpage(
1796 url, playlist_id, 'Downloading show webpage')
1797 # There's one playlist for each season of the show
1798 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1799 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1802 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1803 for season in m_seasons
1805 title = self._og_search_title(webpage, fatal=False)
1808 '_type': 'playlist',
1815 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1817 Base class for feed extractors
1818 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1820 _LOGIN_REQUIRED = True
1824 return 'youtube:%s' % self._FEED_NAME
1826 def _real_initialize(self):
1829 def _real_extract(self, url):
1830 page = self._download_webpage(
1831 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1833 # The extraction process is the same as for playlists, but the regex
1834 # for the video ids doesn't contain an index
1836 more_widget_html = content_html = page
1837 for page_num in itertools.count(1):
1838 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1840 # 'recommended' feed has infinite 'load more' and each new portion spins
1841 # the same videos in (sometimes) slightly different order, so we'll check
1842 # for unicity and break when portion has no new videos
1843 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1849 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1853 more = self._download_json(
1854 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1855 'Downloading page #%s' % page_num,
1856 transform_source=uppercase_escape)
1857 content_html = more['content_html']
1858 more_widget_html = more['load_more_widget_html']
1860 return self.playlist_result(
1861 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1864 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1865 IE_NAME = 'youtube:watchlater'
1866 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1867 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1869 _TESTS = [] # override PlaylistIE tests
1871 def _real_extract(self, url):
1872 return self._extract_playlist('WL')
1875 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1876 IE_NAME = 'youtube:favorites'
1877 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1878 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1879 _LOGIN_REQUIRED = True
1881 def _real_extract(self, url):
1882 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1883 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1884 return self.url_result(playlist_id, 'YoutubePlaylist')
1887 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1888 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1889 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1890 _FEED_NAME = 'recommended'
1891 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1894 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1895 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1896 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1897 _FEED_NAME = 'subscriptions'
1898 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1901 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1902 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1903 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1904 _FEED_NAME = 'history'
1905 _PLAYLIST_TITLE = 'Youtube History'
1908 class YoutubeTruncatedURLIE(InfoExtractor):
1909 IE_NAME = 'youtube:truncated_url'
1910 IE_DESC = False # Do not list
1911 _VALID_URL = r'''(?x)
1913 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1916 annotation_id=annotation_[^&]+|
1921 attribution_link\?a=[^&]+
1927 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1928 'only_matching': True,
1930 'url': 'http://www.youtube.com/watch?',
1931 'only_matching': True,
1933 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1934 'only_matching': True,
1936 'url': 'https://www.youtube.com/watch?feature=foo',
1937 'only_matching': True,
1939 'url': 'https://www.youtube.com/watch?hl=en-GB',
1940 'only_matching': True,
1943 def _real_extract(self, url):
1944 raise ExtractorError(
1945 'Did you forget to quote the URL? Remember that & is a meta '
1946 'character in most shells, so you want to put the URL in quotes, '
1948 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1949 ' or simply youtube-dl BaW_jenozKc .',
1953 class YoutubeTruncatedIDIE(InfoExtractor):
1954 IE_NAME = 'youtube:truncated_id'
1955 IE_DESC = False # Do not list
1956 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1959 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1960 'only_matching': True,
1963 def _real_extract(self, url):
1964 video_id = self._match_id(url)
1965 raise ExtractorError(
1966 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),