]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/youtube.py
07a457f4d7d56355fe677e07a7d27171ea73f2ec
[youtube-dl.git] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import xml.etree.ElementTree
15 import zlib
16
17 from .common import InfoExtractor, SearchInfoExtractor
18 from .subtitles import SubtitlesInfoExtractor
19 from ..utils import (
20     compat_chr,
21     compat_http_client,
22     compat_parse_qs,
23     compat_urllib_error,
24     compat_urllib_parse,
25     compat_urllib_request,
26     compat_urlparse,
27     compat_str,
28
29     clean_html,
30     get_cachedir,
31     get_element_by_id,
32     ExtractorError,
33     unescapeHTML,
34     unified_strdate,
35     orderedSet,
36     write_json_file,
37 )
38
39 class YoutubeBaseInfoExtractor(InfoExtractor):
40     """Provide base functions for Youtube extractors"""
41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
44     _NETRC_MACHINE = 'youtube'
45     # If True it will raise an error if no login info is provided
46     _LOGIN_REQUIRED = False
47
48     def report_lang(self):
49         """Report attempt to set language."""
50         self.to_screen(u'Setting language')
51
52     def _set_language(self):
53         request = compat_urllib_request.Request(self._LANG_URL)
54         try:
55             self.report_lang()
56             compat_urllib_request.urlopen(request).read()
57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
59             return False
60         return True
61
62     def _login(self):
63         (username, password) = self._get_login_info()
64         # No authentication to be performed
65         if username is None:
66             if self._LOGIN_REQUIRED:
67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
68             return False
69
70         request = compat_urllib_request.Request(self._LOGIN_URL)
71         try:
72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
75             return False
76
77         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
78                                   login_page, u'Login GALX parameter')
79
80         # Log in
81         login_form_strs = {
82                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
83                 u'Email': username,
84                 u'GALX': galx,
85                 u'Passwd': password,
86                 u'PersistentCookie': u'yes',
87                 u'_utf8': u'霱',
88                 u'bgresponse': u'js_disabled',
89                 u'checkConnection': u'',
90                 u'checkedDomains': u'youtube',
91                 u'dnConn': u'',
92                 u'pstMsg': u'0',
93                 u'rmShown': u'1',
94                 u'secTok': u'',
95                 u'signIn': u'Sign in',
96                 u'timeStmp': u'',
97                 u'service': u'youtube',
98                 u'uilel': u'3',
99                 u'hl': u'en_US',
100         }
101         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
102         # chokes on unicode
103         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
104         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
105         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
106         try:
107             self.report_login()
108             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
109             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
110                 self._downloader.report_warning(u'unable to log in: bad username or password')
111                 return False
112         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
113             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
114             return False
115         return True
116
117     def _confirm_age(self):
118         age_form = {
119                 'next_url':     '/',
120                 'action_confirm':   'Confirm',
121                 }
122         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
123         try:
124             self.report_age_confirmation()
125             compat_urllib_request.urlopen(request).read().decode('utf-8')
126         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
127             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
128         return True
129
130     def _real_initialize(self):
131         if self._downloader is None:
132             return
133         if not self._set_language():
134             return
135         if not self._login():
136             return
137         self._confirm_age()
138
139
140 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
141     IE_DESC = u'YouTube.com'
142     _VALID_URL = r"""(?x)^
143                      (
144                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
145                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
146                             tube\.majestyc\.net/|
147                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
148                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
149                          (?:                                                  # the various things that can precede the ID:
150                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
151                              |(?:                                             # or the v= param in all its forms
152                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
153                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
154                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
155                                  v=
156                              )
157                          ))
158                          |youtu\.be/                                          # just youtu.be/xxxx
159                          )
160                      )?                                                       # all until now is optional -> you can pass the naked ID
161                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
162                      (?(1).+)?                                                # if we found the ID, everything can follow
163                      $"""
164     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
165     # Listed in order of quality
166     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
167                           # Apple HTTP Live Streaming
168                           '96', '95', '94', '93', '92', '132', '151',
169                           # 3D
170                           '85', '84', '102', '83', '101', '82', '100',
171                           # Dash video
172                           '138', '137', '248', '136', '247', '135', '246',
173                           '245', '244', '134', '243', '133', '242', '160',
174                           # Dash audio
175                           '141', '172', '140', '171', '139',
176                           ]
177     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
178                                       # Apple HTTP Live Streaming
179                                       '96', '95', '94', '93', '92', '132', '151',
180                                       # 3D
181                                       '85', '102', '84', '101', '83', '100', '82',
182                                       # Dash video
183                                       '138', '248', '137', '247', '136', '246', '245',
184                                       '244', '135', '243', '134', '242', '133', '160',
185                                       # Dash audio
186                                       '172', '141', '171', '140', '139',
187                                       ]
188     _video_formats_map = {
189         'flv': ['35', '34', '6', '5'],
190         '3gp': ['36', '17', '13'],
191         'mp4': ['38', '37', '22', '18'],
192         'webm': ['46', '45', '44', '43'],
193     }
194     _video_extensions = {
195         '13': '3gp',
196         '17': '3gp',
197         '18': 'mp4',
198         '22': 'mp4',
199         '36': '3gp',
200         '37': 'mp4',
201         '38': 'mp4',
202         '43': 'webm',
203         '44': 'webm',
204         '45': 'webm',
205         '46': 'webm',
206
207         # 3d videos
208         '82': 'mp4',
209         '83': 'mp4',
210         '84': 'mp4',
211         '85': 'mp4',
212         '100': 'webm',
213         '101': 'webm',
214         '102': 'webm',
215
216         # Apple HTTP Live Streaming
217         '92': 'mp4',
218         '93': 'mp4',
219         '94': 'mp4',
220         '95': 'mp4',
221         '96': 'mp4',
222         '132': 'mp4',
223         '151': 'mp4',
224
225         # Dash mp4
226         '133': 'mp4',
227         '134': 'mp4',
228         '135': 'mp4',
229         '136': 'mp4',
230         '137': 'mp4',
231         '138': 'mp4',
232         '160': 'mp4',
233
234         # Dash mp4 audio
235         '139': 'm4a',
236         '140': 'm4a',
237         '141': 'm4a',
238
239         # Dash webm
240         '171': 'webm',
241         '172': 'webm',
242         '242': 'webm',
243         '243': 'webm',
244         '244': 'webm',
245         '245': 'webm',
246         '246': 'webm',
247         '247': 'webm',
248         '248': 'webm',
249     }
250     _video_dimensions = {
251         '5': '240x400',
252         '6': '???',
253         '13': '???',
254         '17': '144x176',
255         '18': '360x640',
256         '22': '720x1280',
257         '34': '360x640',
258         '35': '480x854',
259         '36': '240x320',
260         '37': '1080x1920',
261         '38': '3072x4096',
262         '43': '360x640',
263         '44': '480x854',
264         '45': '720x1280',
265         '46': '1080x1920',
266         '82': '360p',
267         '83': '480p',
268         '84': '720p',
269         '85': '1080p',
270         '92': '240p',
271         '93': '360p',
272         '94': '480p',
273         '95': '720p',
274         '96': '1080p',
275         '100': '360p',
276         '101': '480p',
277         '102': '720p',
278         '132': '240p',
279         '151': '72p',
280         '133': '240p',
281         '134': '360p',
282         '135': '480p',
283         '136': '720p',
284         '137': '1080p',
285         '138': '>1080p',
286         '139': '48k',
287         '140': '128k',
288         '141': '256k',
289         '160': '192p',
290         '171': '128k',
291         '172': '256k',
292         '242': '240p',
293         '243': '360p',
294         '244': '480p',
295         '245': '480p',
296         '246': '480p',
297         '247': '720p',
298         '248': '1080p',
299     }
300     _special_itags = {
301         '82': '3D',
302         '83': '3D',
303         '84': '3D',
304         '85': '3D',
305         '100': '3D',
306         '101': '3D',
307         '102': '3D',
308         '133': 'DASH Video',
309         '134': 'DASH Video',
310         '135': 'DASH Video',
311         '136': 'DASH Video',
312         '137': 'DASH Video',
313         '138': 'DASH Video',
314         '139': 'DASH Audio',
315         '140': 'DASH Audio',
316         '141': 'DASH Audio',
317         '160': 'DASH Video',
318         '171': 'DASH Audio',
319         '172': 'DASH Audio',
320         '242': 'DASH Video',
321         '243': 'DASH Video',
322         '244': 'DASH Video',
323         '245': 'DASH Video',
324         '246': 'DASH Video',
325         '247': 'DASH Video',
326         '248': 'DASH Video',
327     }
328
329     IE_NAME = u'youtube'
330     _TESTS = [
331         {
332             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
333             u"file":  u"BaW_jenozKc.mp4",
334             u"info_dict": {
335                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
336                 u"uploader": u"Philipp Hagemeister",
337                 u"uploader_id": u"phihag",
338                 u"upload_date": u"20121002",
339                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
340             }
341         },
342         {
343             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
344             u"file":  u"UxxajLWwzqY.mp4",
345             u"note": u"Test generic use_cipher_signature video (#897)",
346             u"info_dict": {
347                 u"upload_date": u"20120506",
348                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
349                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
350                 u"uploader": u"Icona Pop",
351                 u"uploader_id": u"IconaPop"
352             }
353         },
354         {
355             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
356             u"file":  u"07FYdnEawAQ.mp4",
357             u"note": u"Test VEVO video with age protection (#956)",
358             u"info_dict": {
359                 u"upload_date": u"20130703",
360                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
361                 u"description": u"md5:64249768eec3bc4276236606ea996373",
362                 u"uploader": u"justintimberlakeVEVO",
363                 u"uploader_id": u"justintimberlakeVEVO"
364             }
365         },
366         {
367             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
368             u"file":  u"yZIXLfi8CZQ.mp4",
369             u"note": u"Embed-only video (#1746)",
370             u"info_dict": {
371                 u"upload_date": u"20120608",
372                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
373                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
374                 u"uploader": u"SET India",
375                 u"uploader_id": u"setindia"
376             }
377         },
378     ]
379
380
381     @classmethod
382     def suitable(cls, url):
383         """Receives a URL and returns True if suitable for this IE."""
384         if YoutubePlaylistIE.suitable(url): return False
385         return re.match(cls._VALID_URL, url) is not None
386
387     def __init__(self, *args, **kwargs):
388         super(YoutubeIE, self).__init__(*args, **kwargs)
389         self._player_cache = {}
390
391     def report_video_webpage_download(self, video_id):
392         """Report attempt to download video webpage."""
393         self.to_screen(u'%s: Downloading video webpage' % video_id)
394
395     def report_video_info_webpage_download(self, video_id):
396         """Report attempt to download video info webpage."""
397         self.to_screen(u'%s: Downloading video info webpage' % video_id)
398
399     def report_information_extraction(self, video_id):
400         """Report attempt to extract video information."""
401         self.to_screen(u'%s: Extracting video information' % video_id)
402
403     def report_unavailable_format(self, video_id, format):
404         """Report extracted video URL."""
405         self.to_screen(u'%s: Format %s not available' % (video_id, format))
406
407     def report_rtmp_download(self):
408         """Indicate the download will use the RTMP protocol."""
409         self.to_screen(u'RTMP download detected')
410
411     def _extract_signature_function(self, video_id, player_url, slen):
412         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
413                         player_url)
414         player_type = id_m.group('ext')
415         player_id = id_m.group('id')
416
417         # Read from filesystem cache
418         func_id = '%s_%s_%d' % (player_type, player_id, slen)
419         assert os.path.basename(func_id) == func_id
420         cache_dir = get_cachedir(self._downloader.params)
421
422         cache_enabled = cache_dir is not None
423         if cache_enabled:
424             cache_fn = os.path.join(os.path.expanduser(cache_dir),
425                                     u'youtube-sigfuncs',
426                                     func_id + '.json')
427             try:
428                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
429                     cache_spec = json.load(cachef)
430                 return lambda s: u''.join(s[i] for i in cache_spec)
431             except IOError:
432                 pass  # No cache available
433
434         if player_type == 'js':
435             code = self._download_webpage(
436                 player_url, video_id,
437                 note=u'Downloading %s player %s' % (player_type, player_id),
438                 errnote=u'Download of %s failed' % player_url)
439             res = self._parse_sig_js(code)
440         elif player_type == 'swf':
441             urlh = self._request_webpage(
442                 player_url, video_id,
443                 note=u'Downloading %s player %s' % (player_type, player_id),
444                 errnote=u'Download of %s failed' % player_url)
445             code = urlh.read()
446             res = self._parse_sig_swf(code)
447         else:
448             assert False, 'Invalid player type %r' % player_type
449
450         if cache_enabled:
451             try:
452                 test_string = u''.join(map(compat_chr, range(slen)))
453                 cache_res = res(test_string)
454                 cache_spec = [ord(c) for c in cache_res]
455                 try:
456                     os.makedirs(os.path.dirname(cache_fn))
457                 except OSError as ose:
458                     if ose.errno != errno.EEXIST:
459                         raise
460                 write_json_file(cache_spec, cache_fn)
461             except Exception:
462                 tb = traceback.format_exc()
463                 self._downloader.report_warning(
464                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
465
466         return res
467
468     def _print_sig_code(self, func, slen):
469         def gen_sig_code(idxs):
470             def _genslice(start, end, step):
471                 starts = u'' if start == 0 else str(start)
472                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
473                 steps = u'' if step == 1 else (u':%d' % step)
474                 return u's[%s%s%s]' % (starts, ends, steps)
475
476             step = None
477             start = '(Never used)'  # Quelch pyflakes warnings - start will be
478                                     # set as soon as step is set
479             for i, prev in zip(idxs[1:], idxs[:-1]):
480                 if step is not None:
481                     if i - prev == step:
482                         continue
483                     yield _genslice(start, prev, step)
484                     step = None
485                     continue
486                 if i - prev in [-1, 1]:
487                     step = i - prev
488                     start = prev
489                     continue
490                 else:
491                     yield u's[%d]' % prev
492             if step is None:
493                 yield u's[%d]' % i
494             else:
495                 yield _genslice(start, i, step)
496
497         test_string = u''.join(map(compat_chr, range(slen)))
498         cache_res = func(test_string)
499         cache_spec = [ord(c) for c in cache_res]
500         expr_code = u' + '.join(gen_sig_code(cache_spec))
501         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
502         self.to_screen(u'Extracted signature function:\n' + code)
503
504     def _parse_sig_js(self, jscode):
505         funcname = self._search_regex(
506             r'signature=([a-zA-Z]+)', jscode,
507             u'Initial JS player signature function name')
508
509         functions = {}
510
511         def argidx(varname):
512             return string.lowercase.index(varname)
513
514         def interpret_statement(stmt, local_vars, allow_recursion=20):
515             if allow_recursion < 0:
516                 raise ExtractorError(u'Recursion limit reached')
517
518             if stmt.startswith(u'var '):
519                 stmt = stmt[len(u'var '):]
520             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
521                              r'=(?P<expr>.*)$', stmt)
522             if ass_m:
523                 if ass_m.groupdict().get('index'):
524                     def assign(val):
525                         lvar = local_vars[ass_m.group('out')]
526                         idx = interpret_expression(ass_m.group('index'),
527                                                    local_vars, allow_recursion)
528                         assert isinstance(idx, int)
529                         lvar[idx] = val
530                         return val
531                     expr = ass_m.group('expr')
532                 else:
533                     def assign(val):
534                         local_vars[ass_m.group('out')] = val
535                         return val
536                     expr = ass_m.group('expr')
537             elif stmt.startswith(u'return '):
538                 assign = lambda v: v
539                 expr = stmt[len(u'return '):]
540             else:
541                 raise ExtractorError(
542                     u'Cannot determine left side of statement in %r' % stmt)
543
544             v = interpret_expression(expr, local_vars, allow_recursion)
545             return assign(v)
546
547         def interpret_expression(expr, local_vars, allow_recursion):
548             if expr.isdigit():
549                 return int(expr)
550
551             if expr.isalpha():
552                 return local_vars[expr]
553
554             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
555             if m:
556                 member = m.group('member')
557                 val = local_vars[m.group('in')]
558                 if member == 'split("")':
559                     return list(val)
560                 if member == 'join("")':
561                     return u''.join(val)
562                 if member == 'length':
563                     return len(val)
564                 if member == 'reverse()':
565                     return val[::-1]
566                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
567                 if slice_m:
568                     idx = interpret_expression(
569                         slice_m.group('idx'), local_vars, allow_recursion-1)
570                     return val[idx:]
571
572             m = re.match(
573                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
574             if m:
575                 val = local_vars[m.group('in')]
576                 idx = interpret_expression(m.group('idx'), local_vars,
577                                            allow_recursion-1)
578                 return val[idx]
579
580             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
581             if m:
582                 a = interpret_expression(m.group('a'),
583                                          local_vars, allow_recursion)
584                 b = interpret_expression(m.group('b'),
585                                          local_vars, allow_recursion)
586                 return a % b
587
588             m = re.match(
589                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
590             if m:
591                 fname = m.group('func')
592                 if fname not in functions:
593                     functions[fname] = extract_function(fname)
594                 argvals = [int(v) if v.isdigit() else local_vars[v]
595                            for v in m.group('args').split(',')]
596                 return functions[fname](argvals)
597             raise ExtractorError(u'Unsupported JS expression %r' % expr)
598
599         def extract_function(funcname):
600             func_m = re.search(
601                 r'function ' + re.escape(funcname) +
602                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
603                 jscode)
604             argnames = func_m.group('args').split(',')
605
606             def resf(args):
607                 local_vars = dict(zip(argnames, args))
608                 for stmt in func_m.group('code').split(';'):
609                     res = interpret_statement(stmt, local_vars)
610                 return res
611             return resf
612
613         initial_function = extract_function(funcname)
614         return lambda s: initial_function([s])
615
616     def _parse_sig_swf(self, file_contents):
617         if file_contents[1:3] != b'WS':
618             raise ExtractorError(
619                 u'Not an SWF file; header is %r' % file_contents[:3])
620         if file_contents[:1] == b'C':
621             content = zlib.decompress(file_contents[8:])
622         else:
623             raise NotImplementedError(u'Unsupported compression format %r' %
624                                       file_contents[:1])
625
626         def extract_tags(content):
627             pos = 0
628             while pos < len(content):
629                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
630                 pos += 2
631                 tag_code = header16 >> 6
632                 tag_len = header16 & 0x3f
633                 if tag_len == 0x3f:
634                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
635                     pos += 4
636                 assert pos+tag_len <= len(content)
637                 yield (tag_code, content[pos:pos+tag_len])
638                 pos += tag_len
639
640         code_tag = next(tag
641                         for tag_code, tag in extract_tags(content)
642                         if tag_code == 82)
643         p = code_tag.index(b'\0', 4) + 1
644         code_reader = io.BytesIO(code_tag[p:])
645
646         # Parse ABC (AVM2 ByteCode)
647         def read_int(reader=None):
648             if reader is None:
649                 reader = code_reader
650             res = 0
651             shift = 0
652             for _ in range(5):
653                 buf = reader.read(1)
654                 assert len(buf) == 1
655                 b = struct.unpack('<B', buf)[0]
656                 res = res | ((b & 0x7f) << shift)
657                 if b & 0x80 == 0:
658                     break
659                 shift += 7
660             return res
661
662         def u30(reader=None):
663             res = read_int(reader)
664             assert res & 0xf0000000 == 0
665             return res
666         u32 = read_int
667
668         def s32(reader=None):
669             v = read_int(reader)
670             if v & 0x80000000 != 0:
671                 v = - ((v ^ 0xffffffff) + 1)
672             return v
673
674         def read_string(reader=None):
675             if reader is None:
676                 reader = code_reader
677             slen = u30(reader)
678             resb = reader.read(slen)
679             assert len(resb) == slen
680             return resb.decode('utf-8')
681
682         def read_bytes(count, reader=None):
683             if reader is None:
684                 reader = code_reader
685             resb = reader.read(count)
686             assert len(resb) == count
687             return resb
688
689         def read_byte(reader=None):
690             resb = read_bytes(1, reader=reader)
691             res = struct.unpack('<B', resb)[0]
692             return res
693
694         # minor_version + major_version
695         read_bytes(2 + 2)
696
697         # Constant pool
698         int_count = u30()
699         for _c in range(1, int_count):
700             s32()
701         uint_count = u30()
702         for _c in range(1, uint_count):
703             u32()
704         double_count = u30()
705         read_bytes((double_count-1) * 8)
706         string_count = u30()
707         constant_strings = [u'']
708         for _c in range(1, string_count):
709             s = read_string()
710             constant_strings.append(s)
711         namespace_count = u30()
712         for _c in range(1, namespace_count):
713             read_bytes(1)  # kind
714             u30()  # name
715         ns_set_count = u30()
716         for _c in range(1, ns_set_count):
717             count = u30()
718             for _c2 in range(count):
719                 u30()
720         multiname_count = u30()
721         MULTINAME_SIZES = {
722             0x07: 2,  # QName
723             0x0d: 2,  # QNameA
724             0x0f: 1,  # RTQName
725             0x10: 1,  # RTQNameA
726             0x11: 0,  # RTQNameL
727             0x12: 0,  # RTQNameLA
728             0x09: 2,  # Multiname
729             0x0e: 2,  # MultinameA
730             0x1b: 1,  # MultinameL
731             0x1c: 1,  # MultinameLA
732         }
733         multinames = [u'']
734         for _c in range(1, multiname_count):
735             kind = u30()
736             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
737             if kind == 0x07:
738                 u30()  # namespace_idx
739                 name_idx = u30()
740                 multinames.append(constant_strings[name_idx])
741             else:
742                 multinames.append('[MULTINAME kind: %d]' % kind)
743                 for _c2 in range(MULTINAME_SIZES[kind]):
744                     u30()
745
746         # Methods
747         method_count = u30()
748         MethodInfo = collections.namedtuple(
749             'MethodInfo',
750             ['NEED_ARGUMENTS', 'NEED_REST'])
751         method_infos = []
752         for method_id in range(method_count):
753             param_count = u30()
754             u30()  # return type
755             for _ in range(param_count):
756                 u30()  # param type
757             u30()  # name index (always 0 for youtube)
758             flags = read_byte()
759             if flags & 0x08 != 0:
760                 # Options present
761                 option_count = u30()
762                 for c in range(option_count):
763                     u30()  # val
764                     read_bytes(1)  # kind
765             if flags & 0x80 != 0:
766                 # Param names present
767                 for _ in range(param_count):
768                     u30()  # param name
769             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
770             method_infos.append(mi)
771
772         # Metadata
773         metadata_count = u30()
774         for _c in range(metadata_count):
775             u30()  # name
776             item_count = u30()
777             for _c2 in range(item_count):
778                 u30()  # key
779                 u30()  # value
780
781         def parse_traits_info():
782             trait_name_idx = u30()
783             kind_full = read_byte()
784             kind = kind_full & 0x0f
785             attrs = kind_full >> 4
786             methods = {}
787             if kind in [0x00, 0x06]:  # Slot or Const
788                 u30()  # Slot id
789                 u30()  # type_name_idx
790                 vindex = u30()
791                 if vindex != 0:
792                     read_byte()  # vkind
793             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
794                 u30()  # disp_id
795                 method_idx = u30()
796                 methods[multinames[trait_name_idx]] = method_idx
797             elif kind == 0x04:  # Class
798                 u30()  # slot_id
799                 u30()  # classi
800             elif kind == 0x05:  # Function
801                 u30()  # slot_id
802                 function_idx = u30()
803                 methods[function_idx] = multinames[trait_name_idx]
804             else:
805                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
806
807             if attrs & 0x4 != 0:  # Metadata present
808                 metadata_count = u30()
809                 for _c3 in range(metadata_count):
810                     u30()  # metadata index
811
812             return methods
813
814         # Classes
815         TARGET_CLASSNAME = u'SignatureDecipher'
816         searched_idx = multinames.index(TARGET_CLASSNAME)
817         searched_class_id = None
818         class_count = u30()
819         for class_id in range(class_count):
820             name_idx = u30()
821             if name_idx == searched_idx:
822                 # We found the class we're looking for!
823                 searched_class_id = class_id
824             u30()  # super_name idx
825             flags = read_byte()
826             if flags & 0x08 != 0:  # Protected namespace is present
827                 u30()  # protected_ns_idx
828             intrf_count = u30()
829             for _c2 in range(intrf_count):
830                 u30()
831             u30()  # iinit
832             trait_count = u30()
833             for _c2 in range(trait_count):
834                 parse_traits_info()
835
836         if searched_class_id is None:
837             raise ExtractorError(u'Target class %r not found' %
838                                  TARGET_CLASSNAME)
839
840         method_names = {}
841         method_idxs = {}
842         for class_id in range(class_count):
843             u30()  # cinit
844             trait_count = u30()
845             for _c2 in range(trait_count):
846                 trait_methods = parse_traits_info()
847                 if class_id == searched_class_id:
848                     method_names.update(trait_methods.items())
849                     method_idxs.update(dict(
850                         (idx, name)
851                         for name, idx in trait_methods.items()))
852
853         # Scripts
854         script_count = u30()
855         for _c in range(script_count):
856             u30()  # init
857             trait_count = u30()
858             for _c2 in range(trait_count):
859                 parse_traits_info()
860
861         # Method bodies
862         method_body_count = u30()
863         Method = collections.namedtuple('Method', ['code', 'local_count'])
864         methods = {}
865         for _c in range(method_body_count):
866             method_idx = u30()
867             u30()  # max_stack
868             local_count = u30()
869             u30()  # init_scope_depth
870             u30()  # max_scope_depth
871             code_length = u30()
872             code = read_bytes(code_length)
873             if method_idx in method_idxs:
874                 m = Method(code, local_count)
875                 methods[method_idxs[method_idx]] = m
876             exception_count = u30()
877             for _c2 in range(exception_count):
878                 u30()  # from
879                 u30()  # to
880                 u30()  # target
881                 u30()  # exc_type
882                 u30()  # var_name
883             trait_count = u30()
884             for _c2 in range(trait_count):
885                 parse_traits_info()
886
887         assert p + code_reader.tell() == len(code_tag)
888         assert len(methods) == len(method_idxs)
889
890         method_pyfunctions = {}
891
892         def extract_function(func_name):
893             if func_name in method_pyfunctions:
894                 return method_pyfunctions[func_name]
895             if func_name not in methods:
896                 raise ExtractorError(u'Cannot find function %r' % func_name)
897             m = methods[func_name]
898
899             def resfunc(args):
900                 registers = ['(this)'] + list(args) + [None] * m.local_count
901                 stack = []
902                 coder = io.BytesIO(m.code)
903                 while True:
904                     opcode = struct.unpack('!B', coder.read(1))[0]
905                     if opcode == 36:  # pushbyte
906                         v = struct.unpack('!B', coder.read(1))[0]
907                         stack.append(v)
908                     elif opcode == 44:  # pushstring
909                         idx = u30(coder)
910                         stack.append(constant_strings[idx])
911                     elif opcode == 48:  # pushscope
912                         # We don't implement the scope register, so we'll just
913                         # ignore the popped value
914                         stack.pop()
915                     elif opcode == 70:  # callproperty
916                         index = u30(coder)
917                         mname = multinames[index]
918                         arg_count = u30(coder)
919                         args = list(reversed(
920                             [stack.pop() for _ in range(arg_count)]))
921                         obj = stack.pop()
922                         if mname == u'split':
923                             assert len(args) == 1
924                             assert isinstance(args[0], compat_str)
925                             assert isinstance(obj, compat_str)
926                             if args[0] == u'':
927                                 res = list(obj)
928                             else:
929                                 res = obj.split(args[0])
930                             stack.append(res)
931                         elif mname == u'slice':
932                             assert len(args) == 1
933                             assert isinstance(args[0], int)
934                             assert isinstance(obj, list)
935                             res = obj[args[0]:]
936                             stack.append(res)
937                         elif mname == u'join':
938                             assert len(args) == 1
939                             assert isinstance(args[0], compat_str)
940                             assert isinstance(obj, list)
941                             res = args[0].join(obj)
942                             stack.append(res)
943                         elif mname in method_pyfunctions:
944                             stack.append(method_pyfunctions[mname](args))
945                         else:
946                             raise NotImplementedError(
947                                 u'Unsupported property %r on %r'
948                                 % (mname, obj))
949                     elif opcode == 72:  # returnvalue
950                         res = stack.pop()
951                         return res
952                     elif opcode == 79:  # callpropvoid
953                         index = u30(coder)
954                         mname = multinames[index]
955                         arg_count = u30(coder)
956                         args = list(reversed(
957                             [stack.pop() for _ in range(arg_count)]))
958                         obj = stack.pop()
959                         if mname == u'reverse':
960                             assert isinstance(obj, list)
961                             obj.reverse()
962                         else:
963                             raise NotImplementedError(
964                                 u'Unsupported (void) property %r on %r'
965                                 % (mname, obj))
966                     elif opcode == 93:  # findpropstrict
967                         index = u30(coder)
968                         mname = multinames[index]
969                         res = extract_function(mname)
970                         stack.append(res)
971                     elif opcode == 97:  # setproperty
972                         index = u30(coder)
973                         value = stack.pop()
974                         idx = stack.pop()
975                         obj = stack.pop()
976                         assert isinstance(obj, list)
977                         assert isinstance(idx, int)
978                         obj[idx] = value
979                     elif opcode == 98:  # getlocal
980                         index = u30(coder)
981                         stack.append(registers[index])
982                     elif opcode == 99:  # setlocal
983                         index = u30(coder)
984                         value = stack.pop()
985                         registers[index] = value
986                     elif opcode == 102:  # getproperty
987                         index = u30(coder)
988                         pname = multinames[index]
989                         if pname == u'length':
990                             obj = stack.pop()
991                             assert isinstance(obj, list)
992                             stack.append(len(obj))
993                         else:  # Assume attribute access
994                             idx = stack.pop()
995                             assert isinstance(idx, int)
996                             obj = stack.pop()
997                             assert isinstance(obj, list)
998                             stack.append(obj[idx])
999                     elif opcode == 128:  # coerce
1000                         u30(coder)
1001                     elif opcode == 133:  # coerce_s
1002                         assert isinstance(stack[-1], (type(None), compat_str))
1003                     elif opcode == 164:  # modulo
1004                         value2 = stack.pop()
1005                         value1 = stack.pop()
1006                         res = value1 % value2
1007                         stack.append(res)
1008                     elif opcode == 208:  # getlocal_0
1009                         stack.append(registers[0])
1010                     elif opcode == 209:  # getlocal_1
1011                         stack.append(registers[1])
1012                     elif opcode == 210:  # getlocal_2
1013                         stack.append(registers[2])
1014                     elif opcode == 211:  # getlocal_3
1015                         stack.append(registers[3])
1016                     elif opcode == 214:  # setlocal_2
1017                         registers[2] = stack.pop()
1018                     elif opcode == 215:  # setlocal_3
1019                         registers[3] = stack.pop()
1020                     else:
1021                         raise NotImplementedError(
1022                             u'Unsupported opcode %d' % opcode)
1023
1024             method_pyfunctions[func_name] = resfunc
1025             return resfunc
1026
1027         initial_function = extract_function(u'decipher')
1028         return lambda s: initial_function([s])
1029
1030     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1031         """Turn the encrypted s field into a working signature"""
1032
1033         if player_url is not None:
1034             if player_url.startswith(u'//'):
1035                 player_url = u'https:' + player_url
1036             try:
1037                 player_id = (player_url, len(s))
1038                 if player_id not in self._player_cache:
1039                     func = self._extract_signature_function(
1040                         video_id, player_url, len(s)
1041                     )
1042                     self._player_cache[player_id] = func
1043                 func = self._player_cache[player_id]
1044                 if self._downloader.params.get('youtube_print_sig_code'):
1045                     self._print_sig_code(func, len(s))
1046                 return func(s)
1047             except Exception:
1048                 tb = traceback.format_exc()
1049                 self._downloader.report_warning(
1050                     u'Automatic signature extraction failed: ' + tb)
1051
1052             self._downloader.report_warning(
1053                 u'Warning: Falling back to static signature algorithm')
1054
1055         return self._static_decrypt_signature(
1056             s, video_id, player_url, age_gate)
1057
1058     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1059         if age_gate:
1060             # The videos with age protection use another player, so the
1061             # algorithms can be different.
1062             if len(s) == 86:
1063                 return s[2:63] + s[82] + s[64:82] + s[63]
1064
1065         if len(s) == 93:
1066             return s[86:29:-1] + s[88] + s[28:5:-1]
1067         elif len(s) == 92:
1068             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1069         elif len(s) == 91:
1070             return s[84:27:-1] + s[86] + s[26:5:-1]
1071         elif len(s) == 90:
1072             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1073         elif len(s) == 89:
1074             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1075         elif len(s) == 88:
1076             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1077         elif len(s) == 87:
1078             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1079         elif len(s) == 86:
1080             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1081         elif len(s) == 85:
1082             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1083         elif len(s) == 84:
1084             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1085         elif len(s) == 83:
1086             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1087         elif len(s) == 82:
1088             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1089         elif len(s) == 81:
1090             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1091         elif len(s) == 80:
1092             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1093         elif len(s) == 79:
1094             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1095
1096         else:
1097             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1098
1099     def _get_available_subtitles(self, video_id, webpage):
1100         try:
1101             sub_list = self._download_webpage(
1102                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1103                 video_id, note=False)
1104         except ExtractorError as err:
1105             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1106             return {}
1107         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1108
1109         sub_lang_list = {}
1110         for l in lang_list:
1111             lang = l[1]
1112             params = compat_urllib_parse.urlencode({
1113                 'lang': lang,
1114                 'v': video_id,
1115                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1116                 'name': l[0].encode('utf-8'),
1117             })
1118             url = u'http://www.youtube.com/api/timedtext?' + params
1119             sub_lang_list[lang] = url
1120         if not sub_lang_list:
1121             self._downloader.report_warning(u'video doesn\'t have subtitles')
1122             return {}
1123         return sub_lang_list
1124
1125     def _get_available_automatic_caption(self, video_id, webpage):
1126         """We need the webpage for getting the captions url, pass it as an
1127            argument to speed up the process."""
1128         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1129         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1130         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1131         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1132         if mobj is None:
1133             self._downloader.report_warning(err_msg)
1134             return {}
1135         player_config = json.loads(mobj.group(1))
1136         try:
1137             args = player_config[u'args']
1138             caption_url = args[u'ttsurl']
1139             timestamp = args[u'timestamp']
1140             # We get the available subtitles
1141             list_params = compat_urllib_parse.urlencode({
1142                 'type': 'list',
1143                 'tlangs': 1,
1144                 'asrs': 1,
1145             })
1146             list_url = caption_url + '&' + list_params
1147             list_page = self._download_webpage(list_url, video_id)
1148             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1149             original_lang_node = caption_list.find('track')
1150             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1151                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1152                 return {}
1153             original_lang = original_lang_node.attrib['lang_code']
1154
1155             sub_lang_list = {}
1156             for lang_node in caption_list.findall('target'):
1157                 sub_lang = lang_node.attrib['lang_code']
1158                 params = compat_urllib_parse.urlencode({
1159                     'lang': original_lang,
1160                     'tlang': sub_lang,
1161                     'fmt': sub_format,
1162                     'ts': timestamp,
1163                     'kind': 'asr',
1164                 })
1165                 sub_lang_list[sub_lang] = caption_url + '&' + params
1166             return sub_lang_list
1167         # An extractor error can be raise by the download process if there are
1168         # no automatic captions but there are subtitles
1169         except (KeyError, ExtractorError):
1170             self._downloader.report_warning(err_msg)
1171             return {}
1172
1173     def _print_formats(self, formats):
1174         print('Available formats:')
1175         for x in formats:
1176             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1177                                         self._video_dimensions.get(x, '???'),
1178                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1179
1180     def _extract_id(self, url):
1181         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1182         if mobj is None:
1183             raise ExtractorError(u'Invalid URL: %s' % url)
1184         video_id = mobj.group(2)
1185         return video_id
1186
1187     def _get_video_url_list(self, url_map):
1188         """
1189         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1190         with the requested formats.
1191         """
1192         req_format = self._downloader.params.get('format', None)
1193         format_limit = self._downloader.params.get('format_limit', None)
1194         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1195         if format_limit is not None and format_limit in available_formats:
1196             format_list = available_formats[available_formats.index(format_limit):]
1197         else:
1198             format_list = available_formats
1199         existing_formats = [x for x in format_list if x in url_map]
1200         if len(existing_formats) == 0:
1201             raise ExtractorError(u'no known formats available for video')
1202         if self._downloader.params.get('listformats', None):
1203             self._print_formats(existing_formats)
1204             return
1205         if req_format is None or req_format == 'best':
1206             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1207         elif req_format == 'worst':
1208             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1209         elif req_format in ('-1', 'all'):
1210             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1211         else:
1212             # Specific formats. We pick the first in a slash-delimeted sequence.
1213             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1214             # available in the specified format. For example,
1215             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1216             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1217             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1218             req_formats = req_format.split('/')
1219             video_url_list = None
1220             for rf in req_formats:
1221                 if rf in url_map:
1222                     video_url_list = [(rf, url_map[rf])]
1223                     break
1224                 if rf in self._video_formats_map:
1225                     for srf in self._video_formats_map[rf]:
1226                         if srf in url_map:
1227                             video_url_list = [(srf, url_map[srf])]
1228                             break
1229                     else:
1230                         continue
1231                     break
1232             if video_url_list is None:
1233                 raise ExtractorError(u'requested format not available')
1234         return video_url_list
1235
1236     def _extract_from_m3u8(self, manifest_url, video_id):
1237         url_map = {}
1238         def _get_urls(_manifest):
1239             lines = _manifest.split('\n')
1240             urls = filter(lambda l: l and not l.startswith('#'),
1241                             lines)
1242             return urls
1243         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1244         formats_urls = _get_urls(manifest)
1245         for format_url in formats_urls:
1246             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1247             url_map[itag] = format_url
1248         return url_map
1249
1250     def _extract_annotations(self, video_id):
1251         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1252         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1253
1254     def _real_extract(self, url):
1255         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1256         mobj = re.search(self._NEXT_URL_RE, url)
1257         if mobj:
1258             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1259         video_id = self._extract_id(url)
1260
1261         # Get video webpage
1262         self.report_video_webpage_download(video_id)
1263         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1264         request = compat_urllib_request.Request(url)
1265         try:
1266             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1267         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1268             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1269
1270         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1271
1272         # Attempt to extract SWF player URL
1273         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1274         if mobj is not None:
1275             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1276         else:
1277             player_url = None
1278
1279         # Get video info
1280         self.report_video_info_webpage_download(video_id)
1281         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1282             self.report_age_confirmation()
1283             age_gate = True
1284             # We simulate the access to the video from www.youtube.com/v/{video_id}
1285             # this can be viewed without login into Youtube
1286             data = compat_urllib_parse.urlencode({'video_id': video_id,
1287                                                   'el': 'player_embedded',
1288                                                   'gl': 'US',
1289                                                   'hl': 'en',
1290                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1291                                                   'asv': 3,
1292                                                   'sts':'1588',
1293                                                   })
1294             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1295             video_info_webpage = self._download_webpage(video_info_url, video_id,
1296                                     note=False,
1297                                     errnote='unable to download video info webpage')
1298             video_info = compat_parse_qs(video_info_webpage)
1299         else:
1300             age_gate = False
1301             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1302                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1303                         % (video_id, el_type))
1304                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1305                                         note=False,
1306                                         errnote='unable to download video info webpage')
1307                 video_info = compat_parse_qs(video_info_webpage)
1308                 if 'token' in video_info:
1309                     break
1310         if 'token' not in video_info:
1311             if 'reason' in video_info:
1312                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1313             else:
1314                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1315
1316         if 'view_count' in video_info:
1317             view_count = int(video_info['view_count'][0])
1318         else:
1319             view_count = None
1320
1321         # Check for "rental" videos
1322         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1323             raise ExtractorError(u'"rental" videos not supported')
1324
1325         # Start extracting information
1326         self.report_information_extraction(video_id)
1327
1328         # uploader
1329         if 'author' not in video_info:
1330             raise ExtractorError(u'Unable to extract uploader name')
1331         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1332
1333         # uploader_id
1334         video_uploader_id = None
1335         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1336         if mobj is not None:
1337             video_uploader_id = mobj.group(1)
1338         else:
1339             self._downloader.report_warning(u'unable to extract uploader nickname')
1340
1341         # title
1342         if 'title' in video_info:
1343             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1344         else:
1345             self._downloader.report_warning(u'Unable to extract video title')
1346             video_title = u'_'
1347
1348         # thumbnail image
1349         # We try first to get a high quality image:
1350         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1351                             video_webpage, re.DOTALL)
1352         if m_thumb is not None:
1353             video_thumbnail = m_thumb.group(1)
1354         elif 'thumbnail_url' not in video_info:
1355             self._downloader.report_warning(u'unable to extract video thumbnail')
1356             video_thumbnail = None
1357         else:   # don't panic if we can't find it
1358             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1359
1360         # upload date
1361         upload_date = None
1362         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1363         if mobj is not None:
1364             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1365             upload_date = unified_strdate(upload_date)
1366
1367         # description
1368         video_description = get_element_by_id("eow-description", video_webpage)
1369         if video_description:
1370             video_description = clean_html(video_description)
1371         else:
1372             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1373             if fd_mobj:
1374                 video_description = unescapeHTML(fd_mobj.group(1))
1375             else:
1376                 video_description = u''
1377
1378         # subtitles
1379         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1380
1381         if self._downloader.params.get('listsubtitles', False):
1382             self._list_available_subtitles(video_id, video_webpage)
1383             return
1384
1385         if 'length_seconds' not in video_info:
1386             self._downloader.report_warning(u'unable to extract video duration')
1387             video_duration = ''
1388         else:
1389             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1390
1391         # annotations
1392         video_annotations = None
1393         if self._downloader.params.get('writeannotations', False):
1394                 video_annotations = self._extract_annotations(video_id)
1395
1396         # Decide which formats to download
1397
1398         try:
1399             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1400             if not mobj:
1401                 raise ValueError('Could not find vevo ID')
1402             info = json.loads(mobj.group(1))
1403             args = info['args']
1404             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1405             # this signatures are encrypted
1406             if 'url_encoded_fmt_stream_map' not in args:
1407                 raise ValueError(u'No stream_map present')  # caught below
1408             re_signature = re.compile(r'[&,]s=')
1409             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1410             if m_s is not None:
1411                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1412                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1413             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1414             if m_s is not None:
1415                 if 'adaptive_fmts' in video_info:
1416                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1417                 else:
1418                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1419         except ValueError:
1420             pass
1421
1422         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1423             self.report_rtmp_download()
1424             video_url_list = [(None, video_info['conn'][0])]
1425         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1426             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1427             if 'rtmpe%3Dyes' in encoded_url_map:
1428                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1429             url_map = {}
1430             for url_data_str in encoded_url_map.split(','):
1431                 url_data = compat_parse_qs(url_data_str)
1432                 if 'itag' in url_data and 'url' in url_data:
1433                     url = url_data['url'][0]
1434                     if 'sig' in url_data:
1435                         url += '&signature=' + url_data['sig'][0]
1436                     elif 's' in url_data:
1437                         encrypted_sig = url_data['s'][0]
1438                         if self._downloader.params.get('verbose'):
1439                             if age_gate:
1440                                 if player_url is None:
1441                                     player_version = 'unknown'
1442                                 else:
1443                                     player_version = self._search_regex(
1444                                         r'-(.+)\.swf$', player_url,
1445                                         u'flash player', fatal=False)
1446                                 player_desc = 'flash player %s' % player_version
1447                             else:
1448                                 player_version = self._search_regex(
1449                                     r'html5player-(.+?)\.js', video_webpage,
1450                                     'html5 player', fatal=False)
1451                                 player_desc = u'html5 player %s' % player_version
1452
1453                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1454                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1455                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1456
1457                         if not age_gate:
1458                             jsplayer_url_json = self._search_regex(
1459                                 r'"assets":.+?"js":\s*("[^"]+")',
1460                                 video_webpage, u'JS player URL')
1461                             player_url = json.loads(jsplayer_url_json)
1462
1463                         signature = self._decrypt_signature(
1464                             encrypted_sig, video_id, player_url, age_gate)
1465                         url += '&signature=' + signature
1466                     if 'ratebypass' not in url:
1467                         url += '&ratebypass=yes'
1468                     url_map[url_data['itag'][0]] = url
1469             video_url_list = self._get_video_url_list(url_map)
1470             if not video_url_list:
1471                 return
1472         elif video_info.get('hlsvp'):
1473             manifest_url = video_info['hlsvp'][0]
1474             url_map = self._extract_from_m3u8(manifest_url, video_id)
1475             video_url_list = self._get_video_url_list(url_map)
1476             if not video_url_list:
1477                 return
1478
1479         else:
1480             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1481
1482         results = []
1483         for itag, video_real_url in video_url_list:
1484             # Extension
1485             video_extension = self._video_extensions.get(itag, 'flv')
1486
1487             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1488                                               self._video_dimensions.get(itag, '???'),
1489                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1490
1491             results.append({
1492                 'id':       video_id,
1493                 'url':      video_real_url,
1494                 'uploader': video_uploader,
1495                 'uploader_id': video_uploader_id,
1496                 'upload_date':  upload_date,
1497                 'title':    video_title,
1498                 'ext':      video_extension,
1499                 'format':   video_format,
1500                 'format_id': itag,
1501                 'thumbnail':    video_thumbnail,
1502                 'description':  video_description,
1503                 'player_url':   player_url,
1504                 'subtitles':    video_subtitles,
1505                 'duration':     video_duration,
1506                 'age_limit':    18 if age_gate else 0,
1507                 'annotations':  video_annotations,
1508                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1509                 'view_count': view_count,
1510             })
1511         return results
1512
1513 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1514     IE_DESC = u'YouTube.com playlists'
1515     _VALID_URL = r"""(?:
1516                         (?:https?://)?
1517                         (?:\w+\.)?
1518                         youtube\.com/
1519                         (?:
1520                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1521                            \? (?:.*?&)*? (?:p|a|list)=
1522                         |  p/
1523                         )
1524                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1525                         .*
1526                      |
1527                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1528                      )"""
1529     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
1530     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1531     _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
1532     IE_NAME = u'youtube:playlist'
1533
1534     @classmethod
1535     def suitable(cls, url):
1536         """Receives a URL and returns True if suitable for this IE."""
1537         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1538
1539     def _real_initialize(self):
1540         self._login()
1541
1542     def _real_extract(self, url):
1543         # Extract playlist id
1544         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1545         if mobj is None:
1546             raise ExtractorError(u'Invalid URL: %s' % url)
1547         playlist_id = mobj.group(1) or mobj.group(2)
1548
1549         # Check if it's a video-specific URL
1550         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1551         if 'v' in query_dict:
1552             video_id = query_dict['v'][0]
1553             if self._downloader.params.get('noplaylist'):
1554                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1555                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1556             else:
1557                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1558
1559         # Extract the video ids from the playlist pages
1560         ids = []
1561
1562         for page_num in itertools.count(1):
1563             url = self._TEMPLATE_URL % (playlist_id, page_num)
1564             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1565             # The ids are duplicated
1566             new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
1567             ids.extend(new_ids)
1568
1569             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1570                 break
1571
1572         playlist_title = self._og_search_title(page)
1573
1574         url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1575                        for vid_id in ids]
1576         return self.playlist_result(url_results, playlist_id, playlist_title)
1577
1578
1579 class YoutubeChannelIE(InfoExtractor):
1580     IE_DESC = u'YouTube.com channels'
1581     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1582     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1583     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1584     IE_NAME = u'youtube:channel'
1585
1586     def extract_videos_from_page(self, page):
1587         ids_in_page = []
1588         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1589             if mobj.group(1) not in ids_in_page:
1590                 ids_in_page.append(mobj.group(1))
1591         return ids_in_page
1592
1593     def _real_extract(self, url):
1594         # Extract channel id
1595         mobj = re.match(self._VALID_URL, url)
1596         if mobj is None:
1597             raise ExtractorError(u'Invalid URL: %s' % url)
1598
1599         # Download channel page
1600         channel_id = mobj.group(1)
1601         video_ids = []
1602         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1603         channel_page = self._download_webpage(url, channel_id)
1604         if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
1605             autogenerated = True
1606         else:
1607             autogenerated = False
1608
1609         if autogenerated:
1610             # The videos are contained in a single page
1611             # the ajax pages can't be used, they are empty
1612             video_ids = self.extract_videos_from_page(channel_page)
1613         else:
1614             # Download all channel pages using the json-based channel_ajax query
1615             for pagenum in itertools.count(1):
1616                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1617                 page = self._download_webpage(url, channel_id,
1618                                               u'Downloading page #%s' % pagenum)
1619     
1620                 page = json.loads(page)
1621     
1622                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1623                 video_ids.extend(ids_in_page)
1624     
1625                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1626                     break
1627
1628         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1629
1630         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1631                        for video_id in video_ids]
1632         return self.playlist_result(url_entries, channel_id)
1633
1634
1635 class YoutubeUserIE(InfoExtractor):
1636     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1637     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1638     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1639     _GDATA_PAGE_SIZE = 50
1640     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1641     IE_NAME = u'youtube:user'
1642
1643     @classmethod
1644     def suitable(cls, url):
1645         # Don't return True if the url can be extracted with other youtube
1646         # extractor, the regex would is too permissive and it would match.
1647         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1648         if any(ie.suitable(url) for ie in other_ies): return False
1649         else: return super(YoutubeUserIE, cls).suitable(url)
1650
1651     def _real_extract(self, url):
1652         # Extract username
1653         mobj = re.match(self._VALID_URL, url)
1654         if mobj is None:
1655             raise ExtractorError(u'Invalid URL: %s' % url)
1656
1657         username = mobj.group(1)
1658
1659         # Download video ids using YouTube Data API. Result size per
1660         # query is limited (currently to 50 videos) so we need to query
1661         # page by page until there are no video ids - it means we got
1662         # all of them.
1663
1664         video_ids = []
1665
1666         for pagenum in itertools.count(0):
1667             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1668
1669             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1670             page = self._download_webpage(gdata_url, username,
1671                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1672
1673             try:
1674                 response = json.loads(page)
1675             except ValueError as err:
1676                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1677             if 'entry' not in response['feed']:
1678                 # Number of videos is a multiple of self._MAX_RESULTS
1679                 break
1680
1681             # Extract video identifiers
1682             ids_in_page = []
1683             for entry in response['feed']['entry']:
1684                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1685             video_ids.extend(ids_in_page)
1686
1687             # A little optimization - if current page is not
1688             # "full", ie. does not contain PAGE_SIZE video ids then
1689             # we can assume that this page is the last one - there
1690             # are no more ids on further pages - no need to query
1691             # again.
1692
1693             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1694                 break
1695
1696         url_results = [
1697             self.url_result(video_id, 'Youtube', video_id=video_id)
1698             for video_id in video_ids]
1699         return self.playlist_result(url_results, playlist_title=username)
1700
1701
1702 class YoutubeSearchIE(SearchInfoExtractor):
1703     IE_DESC = u'YouTube.com searches'
1704     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1705     _MAX_RESULTS = 1000
1706     IE_NAME = u'youtube:search'
1707     _SEARCH_KEY = 'ytsearch'
1708
1709     def report_download_page(self, query, pagenum):
1710         """Report attempt to download search page with given number."""
1711         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1712
1713     def _get_n_results(self, query, n):
1714         """Get a specified number of results for a query"""
1715
1716         video_ids = []
1717         pagenum = 0
1718         limit = n
1719
1720         while (50 * pagenum) < limit:
1721             self.report_download_page(query, pagenum+1)
1722             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1723             request = compat_urllib_request.Request(result_url)
1724             try:
1725                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1726             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1727                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1728             api_response = json.loads(data)['data']
1729
1730             if not 'items' in api_response:
1731                 raise ExtractorError(u'[youtube] No video results')
1732
1733             new_ids = list(video['id'] for video in api_response['items'])
1734             video_ids += new_ids
1735
1736             limit = min(n, api_response['totalItems'])
1737             pagenum += 1
1738
1739         if len(video_ids) > n:
1740             video_ids = video_ids[:n]
1741         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1742                   for video_id in video_ids]
1743         return self.playlist_result(videos, query)
1744
1745 class YoutubeSearchDateIE(YoutubeSearchIE):
1746     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1747     _SEARCH_KEY = 'ytsearchdate'
1748     IE_DESC = u'YouTube.com searches, newest videos first'
1749
1750 class YoutubeShowIE(InfoExtractor):
1751     IE_DESC = u'YouTube.com (multi-season) shows'
1752     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1753     IE_NAME = u'youtube:show'
1754
1755     def _real_extract(self, url):
1756         mobj = re.match(self._VALID_URL, url)
1757         show_name = mobj.group(1)
1758         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1759         # There's one playlist for each season of the show
1760         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1761         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1762         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1763
1764
1765 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1766     """
1767     Base class for extractors that fetch info from
1768     http://www.youtube.com/feed_ajax
1769     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1770     """
1771     _LOGIN_REQUIRED = True
1772     _PAGING_STEP = 30
1773     # use action_load_personal_feed instead of action_load_system_feed
1774     _PERSONAL_FEED = False
1775
1776     @property
1777     def _FEED_TEMPLATE(self):
1778         action = 'action_load_system_feed'
1779         if self._PERSONAL_FEED:
1780             action = 'action_load_personal_feed'
1781         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1782
1783     @property
1784     def IE_NAME(self):
1785         return u'youtube:%s' % self._FEED_NAME
1786
1787     def _real_initialize(self):
1788         self._login()
1789
1790     def _real_extract(self, url):
1791         feed_entries = []
1792         # The step argument is available only in 2.7 or higher
1793         for i in itertools.count(0):
1794             paging = i*self._PAGING_STEP
1795             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1796                                           u'%s feed' % self._FEED_NAME,
1797                                           u'Downloading page %s' % i)
1798             info = json.loads(info)
1799             feed_html = info['feed_html']
1800             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1801             ids = orderedSet(m.group(1) for m in m_ids)
1802             feed_entries.extend(
1803                 self.url_result(video_id, 'Youtube', video_id=video_id)
1804                 for video_id in ids)
1805             if info['paging'] is None:
1806                 break
1807         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1808
1809 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1810     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1811     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1812     _FEED_NAME = 'subscriptions'
1813     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1814
1815 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1816     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1817     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1818     _FEED_NAME = 'recommended'
1819     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1820
1821 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1822     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1823     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1824     _FEED_NAME = 'watch_later'
1825     _PLAYLIST_TITLE = u'Youtube Watch Later'
1826     _PAGING_STEP = 100
1827     _PERSONAL_FEED = True
1828
1829 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1830     IE_NAME = u'youtube:favorites'
1831     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1832     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1833     _LOGIN_REQUIRED = True
1834
1835     def _real_extract(self, url):
1836         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1837         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1838         return self.url_result(playlist_id, 'YoutubePlaylist')
1839
1840
1841 class YoutubeTruncatedURLIE(InfoExtractor):
1842     IE_NAME = 'youtube:truncated_url'
1843     IE_DESC = False  # Do not list
1844     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1845
1846     def _real_extract(self, url):
1847         raise ExtractorError(
1848             u'Did you forget to quote the URL? Remember that & is a meta '
1849             u'character in most shells, so you want to put the URL in quotes, '
1850             u'like  youtube-dl '
1851             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1852             u' (or simply  youtube-dl BaW_jenozKc  ).',
1853             expected=True)