youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_cachedir,
  31     get_element_by_id,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  78                                   login_page, u'Login GALX parameter')
  79
  80         # Log in
  81         login_form_strs = {
  82                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  83                 u'Email': username,
  84                 u'GALX': galx,
  85                 u'Passwd': password,
  86                 u'PersistentCookie': u'yes',
  87                 u'_utf8': u'霱',
  88                 u'bgresponse': u'js_disabled',
  89                 u'checkConnection': u'',
  90                 u'checkedDomains': u'youtube',
  91                 u'dnConn': u'',
  92                 u'pstMsg': u'0',
  93                 u'rmShown': u'1',
  94                 u'secTok': u'',
  95                 u'signIn': u'Sign in',
  96                 u'timeStmp': u'',
  97                 u'service': u'youtube',
  98                 u'uilel': u'3',
  99                 u'hl': u'en_US',
 100         }
 101         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 102         # chokes on unicode
 103         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 104         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 105         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 106         try:
 107             self.report_login()
 108             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 109             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 110                 self._downloader.report_warning(u'unable to log in: bad username or password')
 111                 return False
 112         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 113             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 114             return False
 115         return True
 116
 117     def _confirm_age(self):
 118         age_form = {
 119                 'next_url':     '/',
 120                 'action_confirm':   'Confirm',
 121                 }
 122         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 123         try:
 124             self.report_age_confirmation()
 125             compat_urllib_request.urlopen(request).read().decode('utf-8')
 126         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 127             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 128         return True
 129
 130     def _real_initialize(self):
 131         if self._downloader is None:
 132             return
 133         if not self._set_language():
 134             return
 135         if not self._login():
 136             return
 137         self._confirm_age()
 138
 139
 140 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 141     IE_DESC = u'YouTube.com'
 142     _VALID_URL = r"""^
 143                      (
 144                          (?:https?://)?                                       # http(s):// (optional)
 145                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 146                             tube\.majestyc\.net/|
 147                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 148                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 149                          (?:                                                  # the various things that can precede the ID:
 150                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 151                              |(?:                                             # or the v= param in all its forms
 152                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 153                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 154                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 155                                  v=
 156                              )
 157                          ))
 158                          |youtu\.be/                                          # just youtu.be/xxxx
 159                          )
 160                      )?                                                       # all until now is optional -> you can pass the naked ID
 161                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 162                      (?(1).+)?                                                # if we found the ID, everything can follow
 163                      $"""
 164     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 165     # Listed in order of quality
 166     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 167                           # Apple HTTP Live Streaming
 168                           '96', '95', '94', '93', '92', '132', '151',
 169                           # 3D
 170                           '85', '84', '102', '83', '101', '82', '100',
 171                           # Dash video
 172                           '138', '137', '248', '136', '247', '135', '246',
 173                           '245', '244', '134', '243', '133', '242', '160',
 174                           # Dash audio
 175                           '141', '172', '140', '171', '139',
 176                           ]
 177     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 178                                       # Apple HTTP Live Streaming
 179                                       '96', '95', '94', '93', '92', '132', '151',
 180                                       # 3D
 181                                       '85', '102', '84', '101', '83', '100', '82',
 182                                       # Dash video
 183                                       '138', '248', '137', '247', '136', '246', '245',
 184                                       '244', '135', '243', '134', '242', '133', '160',
 185                                       # Dash audio
 186                                       '172', '141', '171', '140', '139',
 187                                       ]
 188     _video_formats_map = {
 189         'flv': ['35', '34', '6', '5'],
 190         '3gp': ['36', '17', '13'],
 191         'mp4': ['38', '37', '22', '18'],
 192         'webm': ['46', '45', '44', '43'],
 193     }
 194     _video_extensions = {
 195         '13': '3gp',
 196         '17': '3gp',
 197         '18': 'mp4',
 198         '22': 'mp4',
 199         '36': '3gp',
 200         '37': 'mp4',
 201         '38': 'mp4',
 202         '43': 'webm',
 203         '44': 'webm',
 204         '45': 'webm',
 205         '46': 'webm',
 206
 207         # 3d videos
 208         '82': 'mp4',
 209         '83': 'mp4',
 210         '84': 'mp4',
 211         '85': 'mp4',
 212         '100': 'webm',
 213         '101': 'webm',
 214         '102': 'webm',
 215
 216         # Apple HTTP Live Streaming
 217         '92': 'mp4',
 218         '93': 'mp4',
 219         '94': 'mp4',
 220         '95': 'mp4',
 221         '96': 'mp4',
 222         '132': 'mp4',
 223         '151': 'mp4',
 224
 225         # Dash mp4
 226         '133': 'mp4',
 227         '134': 'mp4',
 228         '135': 'mp4',
 229         '136': 'mp4',
 230         '137': 'mp4',
 231         '138': 'mp4',
 232         '160': 'mp4',
 233
 234         # Dash mp4 audio
 235         '139': 'm4a',
 236         '140': 'm4a',
 237         '141': 'm4a',
 238
 239         # Dash webm
 240         '171': 'webm',
 241         '172': 'webm',
 242         '242': 'webm',
 243         '243': 'webm',
 244         '244': 'webm',
 245         '245': 'webm',
 246         '246': 'webm',
 247         '247': 'webm',
 248         '248': 'webm',
 249     }
 250     _video_dimensions = {
 251         '5': '240x400',
 252         '6': '???',
 253         '13': '???',
 254         '17': '144x176',
 255         '18': '360x640',
 256         '22': '720x1280',
 257         '34': '360x640',
 258         '35': '480x854',
 259         '36': '240x320',
 260         '37': '1080x1920',
 261         '38': '3072x4096',
 262         '43': '360x640',
 263         '44': '480x854',
 264         '45': '720x1280',
 265         '46': '1080x1920',
 266         '82': '360p',
 267         '83': '480p',
 268         '84': '720p',
 269         '85': '1080p',
 270         '92': '240p',
 271         '93': '360p',
 272         '94': '480p',
 273         '95': '720p',
 274         '96': '1080p',
 275         '100': '360p',
 276         '101': '480p',
 277         '102': '720p',
 278         '132': '240p',
 279         '151': '72p',
 280         '133': '240p',
 281         '134': '360p',
 282         '135': '480p',
 283         '136': '720p',
 284         '137': '1080p',
 285         '138': '>1080p',
 286         '139': '48k',
 287         '140': '128k',
 288         '141': '256k',
 289         '160': '192p',
 290         '171': '128k',
 291         '172': '256k',
 292         '242': '240p',
 293         '243': '360p',
 294         '244': '480p',
 295         '245': '480p',
 296         '246': '480p',
 297         '247': '720p',
 298         '248': '1080p',
 299     }
 300     _special_itags = {
 301         '82': '3D',
 302         '83': '3D',
 303         '84': '3D',
 304         '85': '3D',
 305         '100': '3D',
 306         '101': '3D',
 307         '102': '3D',
 308         '133': 'DASH Video',
 309         '134': 'DASH Video',
 310         '135': 'DASH Video',
 311         '136': 'DASH Video',
 312         '137': 'DASH Video',
 313         '138': 'DASH Video',
 314         '139': 'DASH Audio',
 315         '140': 'DASH Audio',
 316         '141': 'DASH Audio',
 317         '160': 'DASH Video',
 318         '171': 'DASH Audio',
 319         '172': 'DASH Audio',
 320         '242': 'DASH Video',
 321         '243': 'DASH Video',
 322         '244': 'DASH Video',
 323         '245': 'DASH Video',
 324         '246': 'DASH Video',
 325         '247': 'DASH Video',
 326         '248': 'DASH Video',
 327     }
 328
 329     IE_NAME = u'youtube'
 330     _TESTS = [
 331         {
 332             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 333             u"file":  u"BaW_jenozKc.mp4",
 334             u"info_dict": {
 335                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 336                 u"uploader": u"Philipp Hagemeister",
 337                 u"uploader_id": u"phihag",
 338                 u"upload_date": u"20121002",
 339                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 340             }
 341         },
 342         {
 343             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 344             u"file":  u"1ltcDfZMA3U.mp4",
 345             u"note": u"Test VEVO video (#897)",
 346             u"info_dict": {
 347                 u"upload_date": u"20070518",
 348                 u"title": u"Maps - It Will Find You",
 349                 u"description": u"Music video by Maps performing It Will Find You.",
 350                 u"uploader": u"MuteUSA",
 351                 u"uploader_id": u"MuteUSA"
 352             }
 353         },
 354         {
 355             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 356             u"file":  u"UxxajLWwzqY.mp4",
 357             u"note": u"Test generic use_cipher_signature video (#897)",
 358             u"info_dict": {
 359                 u"upload_date": u"20120506",
 360                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 361                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 362                 u"uploader": u"Icona Pop",
 363                 u"uploader_id": u"IconaPop"
 364             }
 365         },
 366         {
 367             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 368             u"file":  u"07FYdnEawAQ.mp4",
 369             u"note": u"Test VEVO video with age protection (#956)",
 370             u"info_dict": {
 371                 u"upload_date": u"20130703",
 372                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 373                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 374                 u"uploader": u"justintimberlakeVEVO",
 375                 u"uploader_id": u"justintimberlakeVEVO"
 376             }
 377         },
 378     ]
 379
 380
 381     @classmethod
 382     def suitable(cls, url):
 383         """Receives a URL and returns True if suitable for this IE."""
 384         if YoutubePlaylistIE.suitable(url): return False
 385         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 386
 387     def __init__(self, *args, **kwargs):
 388         super(YoutubeIE, self).__init__(*args, **kwargs)
 389         self._player_cache = {}
 390
 391     def report_video_webpage_download(self, video_id):
 392         """Report attempt to download video webpage."""
 393         self.to_screen(u'%s: Downloading video webpage' % video_id)
 394
 395     def report_video_info_webpage_download(self, video_id):
 396         """Report attempt to download video info webpage."""
 397         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 398
 399     def report_information_extraction(self, video_id):
 400         """Report attempt to extract video information."""
 401         self.to_screen(u'%s: Extracting video information' % video_id)
 402
 403     def report_unavailable_format(self, video_id, format):
 404         """Report extracted video URL."""
 405         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 406
 407     def report_rtmp_download(self):
 408         """Indicate the download will use the RTMP protocol."""
 409         self.to_screen(u'RTMP download detected')
 410
 411     def _extract_signature_function(self, video_id, player_url, slen):
 412         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 413                         player_url)
 414         player_type = id_m.group('ext')
 415         player_id = id_m.group('id')
 416
 417         # Read from filesystem cache
 418         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 419         assert os.path.basename(func_id) == func_id
 420         cache_dir = get_cachedir(self._downloader.params)
 421
 422         cache_enabled = cache_dir is not None
 423         if cache_enabled:
 424             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 425                                     u'youtube-sigfuncs',
 426                                     func_id + '.json')
 427             try:
 428                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 429                     cache_spec = json.load(cachef)
 430                 return lambda s: u''.join(s[i] for i in cache_spec)
 431             except IOError:
 432                 pass  # No cache available
 433
 434         if player_type == 'js':
 435             code = self._download_webpage(
 436                 player_url, video_id,
 437                 note=u'Downloading %s player %s' % (player_type, player_id),
 438                 errnote=u'Download of %s failed' % player_url)
 439             res = self._parse_sig_js(code)
 440         elif player_type == 'swf':
 441             urlh = self._request_webpage(
 442                 player_url, video_id,
 443                 note=u'Downloading %s player %s' % (player_type, player_id),
 444                 errnote=u'Download of %s failed' % player_url)
 445             code = urlh.read()
 446             res = self._parse_sig_swf(code)
 447         else:
 448             assert False, 'Invalid player type %r' % player_type
 449
 450         if cache_enabled:
 451             try:
 452                 test_string = u''.join(map(compat_chr, range(slen)))
 453                 cache_res = res(test_string)
 454                 cache_spec = [ord(c) for c in cache_res]
 455                 try:
 456                     os.makedirs(os.path.dirname(cache_fn))
 457                 except OSError as ose:
 458                     if ose.errno != errno.EEXIST:
 459                         raise
 460                 write_json_file(cache_spec, cache_fn)
 461             except Exception:
 462                 tb = traceback.format_exc()
 463                 self._downloader.report_warning(
 464                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 465
 466         return res
 467
 468     def _print_sig_code(self, func, slen):
 469         def gen_sig_code(idxs):
 470             def _genslice(start, end, step):
 471                 starts = u'' if start == 0 else str(start)
 472                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 473                 steps = u'' if step == 1 else (u':%d' % step)
 474                 return u's[%s%s%s]' % (starts, ends, steps)
 475
 476             step = None
 477             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 478                                     # set as soon as step is set
 479             for i, prev in zip(idxs[1:], idxs[:-1]):
 480                 if step is not None:
 481                     if i - prev == step:
 482                         continue
 483                     yield _genslice(start, prev, step)
 484                     step = None
 485                     continue
 486                 if i - prev in [-1, 1]:
 487                     step = i - prev
 488                     start = prev
 489                     continue
 490                 else:
 491                     yield u's[%d]' % prev
 492             if step is None:
 493                 yield u's[%d]' % i
 494             else:
 495                 yield _genslice(start, i, step)
 496
 497         test_string = u''.join(map(compat_chr, range(slen)))
 498         cache_res = func(test_string)
 499         cache_spec = [ord(c) for c in cache_res]
 500         expr_code = u' + '.join(gen_sig_code(cache_spec))
 501         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 502         self.to_screen(u'Extracted signature function:\n' + code)
 503
 504     def _parse_sig_js(self, jscode):
 505         funcname = self._search_regex(
 506             r'signature=([a-zA-Z]+)', jscode,
 507             u'Initial JS player signature function name')
 508
 509         functions = {}
 510
 511         def argidx(varname):
 512             return string.lowercase.index(varname)
 513
 514         def interpret_statement(stmt, local_vars, allow_recursion=20):
 515             if allow_recursion < 0:
 516                 raise ExtractorError(u'Recursion limit reached')
 517
 518             if stmt.startswith(u'var '):
 519                 stmt = stmt[len(u'var '):]
 520             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 521                              r'=(?P<expr>.*)$', stmt)
 522             if ass_m:
 523                 if ass_m.groupdict().get('index'):
 524                     def assign(val):
 525                         lvar = local_vars[ass_m.group('out')]
 526                         idx = interpret_expression(ass_m.group('index'),
 527                                                    local_vars, allow_recursion)
 528                         assert isinstance(idx, int)
 529                         lvar[idx] = val
 530                         return val
 531                     expr = ass_m.group('expr')
 532                 else:
 533                     def assign(val):
 534                         local_vars[ass_m.group('out')] = val
 535                         return val
 536                     expr = ass_m.group('expr')
 537             elif stmt.startswith(u'return '):
 538                 assign = lambda v: v
 539                 expr = stmt[len(u'return '):]
 540             else:
 541                 raise ExtractorError(
 542                     u'Cannot determine left side of statement in %r' % stmt)
 543
 544             v = interpret_expression(expr, local_vars, allow_recursion)
 545             return assign(v)
 546
 547         def interpret_expression(expr, local_vars, allow_recursion):
 548             if expr.isdigit():
 549                 return int(expr)
 550
 551             if expr.isalpha():
 552                 return local_vars[expr]
 553
 554             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 555             if m:
 556                 member = m.group('member')
 557                 val = local_vars[m.group('in')]
 558                 if member == 'split("")':
 559                     return list(val)
 560                 if member == 'join("")':
 561                     return u''.join(val)
 562                 if member == 'length':
 563                     return len(val)
 564                 if member == 'reverse()':
 565                     return val[::-1]
 566                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 567                 if slice_m:
 568                     idx = interpret_expression(
 569                         slice_m.group('idx'), local_vars, allow_recursion-1)
 570                     return val[idx:]
 571
 572             m = re.match(
 573                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 574             if m:
 575                 val = local_vars[m.group('in')]
 576                 idx = interpret_expression(m.group('idx'), local_vars,
 577                                            allow_recursion-1)
 578                 return val[idx]
 579
 580             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 581             if m:
 582                 a = interpret_expression(m.group('a'),
 583                                          local_vars, allow_recursion)
 584                 b = interpret_expression(m.group('b'),
 585                                          local_vars, allow_recursion)
 586                 return a % b
 587
 588             m = re.match(
 589                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 590             if m:
 591                 fname = m.group('func')
 592                 if fname not in functions:
 593                     functions[fname] = extract_function(fname)
 594                 argvals = [int(v) if v.isdigit() else local_vars[v]
 595                            for v in m.group('args').split(',')]
 596                 return functions[fname](argvals)
 597             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 598
 599         def extract_function(funcname):
 600             func_m = re.search(
 601                 r'function ' + re.escape(funcname) +
 602                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 603                 jscode)
 604             argnames = func_m.group('args').split(',')
 605
 606             def resf(args):
 607                 local_vars = dict(zip(argnames, args))
 608                 for stmt in func_m.group('code').split(';'):
 609                     res = interpret_statement(stmt, local_vars)
 610                 return res
 611             return resf
 612
 613         initial_function = extract_function(funcname)
 614         return lambda s: initial_function([s])
 615
 616     def _parse_sig_swf(self, file_contents):
 617         if file_contents[1:3] != b'WS':
 618             raise ExtractorError(
 619                 u'Not an SWF file; header is %r' % file_contents[:3])
 620         if file_contents[:1] == b'C':
 621             content = zlib.decompress(file_contents[8:])
 622         else:
 623             raise NotImplementedError(u'Unsupported compression format %r' %
 624                                       file_contents[:1])
 625
 626         def extract_tags(content):
 627             pos = 0
 628             while pos < len(content):
 629                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 630                 pos += 2
 631                 tag_code = header16 >> 6
 632                 tag_len = header16 & 0x3f
 633                 if tag_len == 0x3f:
 634                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 635                     pos += 4
 636                 assert pos+tag_len <= len(content)
 637                 yield (tag_code, content[pos:pos+tag_len])
 638                 pos += tag_len
 639
 640         code_tag = next(tag
 641                         for tag_code, tag in extract_tags(content)
 642                         if tag_code == 82)
 643         p = code_tag.index(b'\0', 4) + 1
 644         code_reader = io.BytesIO(code_tag[p:])
 645
 646         # Parse ABC (AVM2 ByteCode)
 647         def read_int(reader=None):
 648             if reader is None:
 649                 reader = code_reader
 650             res = 0
 651             shift = 0
 652             for _ in range(5):
 653                 buf = reader.read(1)
 654                 assert len(buf) == 1
 655                 b = struct.unpack('<B', buf)[0]
 656                 res = res | ((b & 0x7f) << shift)
 657                 if b & 0x80 == 0:
 658                     break
 659                 shift += 7
 660             return res
 661
 662         def u30(reader=None):
 663             res = read_int(reader)
 664             assert res & 0xf0000000 == 0
 665             return res
 666         u32 = read_int
 667
 668         def s32(reader=None):
 669             v = read_int(reader)
 670             if v & 0x80000000 != 0:
 671                 v = - ((v ^ 0xffffffff) + 1)
 672             return v
 673
 674         def read_string(reader=None):
 675             if reader is None:
 676                 reader = code_reader
 677             slen = u30(reader)
 678             resb = reader.read(slen)
 679             assert len(resb) == slen
 680             return resb.decode('utf-8')
 681
 682         def read_bytes(count, reader=None):
 683             if reader is None:
 684                 reader = code_reader
 685             resb = reader.read(count)
 686             assert len(resb) == count
 687             return resb
 688
 689         def read_byte(reader=None):
 690             resb = read_bytes(1, reader=reader)
 691             res = struct.unpack('<B', resb)[0]
 692             return res
 693
 694         # minor_version + major_version
 695         read_bytes(2 + 2)
 696
 697         # Constant pool
 698         int_count = u30()
 699         for _c in range(1, int_count):
 700             s32()
 701         uint_count = u30()
 702         for _c in range(1, uint_count):
 703             u32()
 704         double_count = u30()
 705         read_bytes((double_count-1) * 8)
 706         string_count = u30()
 707         constant_strings = [u'']
 708         for _c in range(1, string_count):
 709             s = read_string()
 710             constant_strings.append(s)
 711         namespace_count = u30()
 712         for _c in range(1, namespace_count):
 713             read_bytes(1)  # kind
 714             u30()  # name
 715         ns_set_count = u30()
 716         for _c in range(1, ns_set_count):
 717             count = u30()
 718             for _c2 in range(count):
 719                 u30()
 720         multiname_count = u30()
 721         MULTINAME_SIZES = {
 722             0x07: 2,  # QName
 723             0x0d: 2,  # QNameA
 724             0x0f: 1,  # RTQName
 725             0x10: 1,  # RTQNameA
 726             0x11: 0,  # RTQNameL
 727             0x12: 0,  # RTQNameLA
 728             0x09: 2,  # Multiname
 729             0x0e: 2,  # MultinameA
 730             0x1b: 1,  # MultinameL
 731             0x1c: 1,  # MultinameLA
 732         }
 733         multinames = [u'']
 734         for _c in range(1, multiname_count):
 735             kind = u30()
 736             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 737             if kind == 0x07:
 738                 u30()  # namespace_idx
 739                 name_idx = u30()
 740                 multinames.append(constant_strings[name_idx])
 741             else:
 742                 multinames.append('[MULTINAME kind: %d]' % kind)
 743                 for _c2 in range(MULTINAME_SIZES[kind]):
 744                     u30()
 745
 746         # Methods
 747         method_count = u30()
 748         MethodInfo = collections.namedtuple(
 749             'MethodInfo',
 750             ['NEED_ARGUMENTS', 'NEED_REST'])
 751         method_infos = []
 752         for method_id in range(method_count):
 753             param_count = u30()
 754             u30()  # return type
 755             for _ in range(param_count):
 756                 u30()  # param type
 757             u30()  # name index (always 0 for youtube)
 758             flags = read_byte()
 759             if flags & 0x08 != 0:
 760                 # Options present
 761                 option_count = u30()
 762                 for c in range(option_count):
 763                     u30()  # val
 764                     read_bytes(1)  # kind
 765             if flags & 0x80 != 0:
 766                 # Param names present
 767                 for _ in range(param_count):
 768                     u30()  # param name
 769             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 770             method_infos.append(mi)
 771
 772         # Metadata
 773         metadata_count = u30()
 774         for _c in range(metadata_count):
 775             u30()  # name
 776             item_count = u30()
 777             for _c2 in range(item_count):
 778                 u30()  # key
 779                 u30()  # value
 780
 781         def parse_traits_info():
 782             trait_name_idx = u30()
 783             kind_full = read_byte()
 784             kind = kind_full & 0x0f
 785             attrs = kind_full >> 4
 786             methods = {}
 787             if kind in [0x00, 0x06]:  # Slot or Const
 788                 u30()  # Slot id
 789                 u30()  # type_name_idx
 790                 vindex = u30()
 791                 if vindex != 0:
 792                     read_byte()  # vkind
 793             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 794                 u30()  # disp_id
 795                 method_idx = u30()
 796                 methods[multinames[trait_name_idx]] = method_idx
 797             elif kind == 0x04:  # Class
 798                 u30()  # slot_id
 799                 u30()  # classi
 800             elif kind == 0x05:  # Function
 801                 u30()  # slot_id
 802                 function_idx = u30()
 803                 methods[function_idx] = multinames[trait_name_idx]
 804             else:
 805                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 806
 807             if attrs & 0x4 != 0:  # Metadata present
 808                 metadata_count = u30()
 809                 for _c3 in range(metadata_count):
 810                     u30()  # metadata index
 811
 812             return methods
 813
 814         # Classes
 815         TARGET_CLASSNAME = u'SignatureDecipher'
 816         searched_idx = multinames.index(TARGET_CLASSNAME)
 817         searched_class_id = None
 818         class_count = u30()
 819         for class_id in range(class_count):
 820             name_idx = u30()
 821             if name_idx == searched_idx:
 822                 # We found the class we're looking for!
 823                 searched_class_id = class_id
 824             u30()  # super_name idx
 825             flags = read_byte()
 826             if flags & 0x08 != 0:  # Protected namespace is present
 827                 u30()  # protected_ns_idx
 828             intrf_count = u30()
 829             for _c2 in range(intrf_count):
 830                 u30()
 831             u30()  # iinit
 832             trait_count = u30()
 833             for _c2 in range(trait_count):
 834                 parse_traits_info()
 835
 836         if searched_class_id is None:
 837             raise ExtractorError(u'Target class %r not found' %
 838                                  TARGET_CLASSNAME)
 839
 840         method_names = {}
 841         method_idxs = {}
 842         for class_id in range(class_count):
 843             u30()  # cinit
 844             trait_count = u30()
 845             for _c2 in range(trait_count):
 846                 trait_methods = parse_traits_info()
 847                 if class_id == searched_class_id:
 848                     method_names.update(trait_methods.items())
 849                     method_idxs.update(dict(
 850                         (idx, name)
 851                         for name, idx in trait_methods.items()))
 852
 853         # Scripts
 854         script_count = u30()
 855         for _c in range(script_count):
 856             u30()  # init
 857             trait_count = u30()
 858             for _c2 in range(trait_count):
 859                 parse_traits_info()
 860
 861         # Method bodies
 862         method_body_count = u30()
 863         Method = collections.namedtuple('Method', ['code', 'local_count'])
 864         methods = {}
 865         for _c in range(method_body_count):
 866             method_idx = u30()
 867             u30()  # max_stack
 868             local_count = u30()
 869             u30()  # init_scope_depth
 870             u30()  # max_scope_depth
 871             code_length = u30()
 872             code = read_bytes(code_length)
 873             if method_idx in method_idxs:
 874                 m = Method(code, local_count)
 875                 methods[method_idxs[method_idx]] = m
 876             exception_count = u30()
 877             for _c2 in range(exception_count):
 878                 u30()  # from
 879                 u30()  # to
 880                 u30()  # target
 881                 u30()  # exc_type
 882                 u30()  # var_name
 883             trait_count = u30()
 884             for _c2 in range(trait_count):
 885                 parse_traits_info()
 886
 887         assert p + code_reader.tell() == len(code_tag)
 888         assert len(methods) == len(method_idxs)
 889
 890         method_pyfunctions = {}
 891
 892         def extract_function(func_name):
 893             if func_name in method_pyfunctions:
 894                 return method_pyfunctions[func_name]
 895             if func_name not in methods:
 896                 raise ExtractorError(u'Cannot find function %r' % func_name)
 897             m = methods[func_name]
 898
 899             def resfunc(args):
 900                 registers = ['(this)'] + list(args) + [None] * m.local_count
 901                 stack = []
 902                 coder = io.BytesIO(m.code)
 903                 while True:
 904                     opcode = struct.unpack('!B', coder.read(1))[0]
 905                     if opcode == 36:  # pushbyte
 906                         v = struct.unpack('!B', coder.read(1))[0]
 907                         stack.append(v)
 908                     elif opcode == 44:  # pushstring
 909                         idx = u30(coder)
 910                         stack.append(constant_strings[idx])
 911                     elif opcode == 48:  # pushscope
 912                         # We don't implement the scope register, so we'll just
 913                         # ignore the popped value
 914                         stack.pop()
 915                     elif opcode == 70:  # callproperty
 916                         index = u30(coder)
 917                         mname = multinames[index]
 918                         arg_count = u30(coder)
 919                         args = list(reversed(
 920                             [stack.pop() for _ in range(arg_count)]))
 921                         obj = stack.pop()
 922                         if mname == u'split':
 923                             assert len(args) == 1
 924                             assert isinstance(args[0], compat_str)
 925                             assert isinstance(obj, compat_str)
 926                             if args[0] == u'':
 927                                 res = list(obj)
 928                             else:
 929                                 res = obj.split(args[0])
 930                             stack.append(res)
 931                         elif mname == u'slice':
 932                             assert len(args) == 1
 933                             assert isinstance(args[0], int)
 934                             assert isinstance(obj, list)
 935                             res = obj[args[0]:]
 936                             stack.append(res)
 937                         elif mname == u'join':
 938                             assert len(args) == 1
 939                             assert isinstance(args[0], compat_str)
 940                             assert isinstance(obj, list)
 941                             res = args[0].join(obj)
 942                             stack.append(res)
 943                         elif mname in method_pyfunctions:
 944                             stack.append(method_pyfunctions[mname](args))
 945                         else:
 946                             raise NotImplementedError(
 947                                 u'Unsupported property %r on %r'
 948                                 % (mname, obj))
 949                     elif opcode == 72:  # returnvalue
 950                         res = stack.pop()
 951                         return res
 952                     elif opcode == 79:  # callpropvoid
 953                         index = u30(coder)
 954                         mname = multinames[index]
 955                         arg_count = u30(coder)
 956                         args = list(reversed(
 957                             [stack.pop() for _ in range(arg_count)]))
 958                         obj = stack.pop()
 959                         if mname == u'reverse':
 960                             assert isinstance(obj, list)
 961                             obj.reverse()
 962                         else:
 963                             raise NotImplementedError(
 964                                 u'Unsupported (void) property %r on %r'
 965                                 % (mname, obj))
 966                     elif opcode == 93:  # findpropstrict
 967                         index = u30(coder)
 968                         mname = multinames[index]
 969                         res = extract_function(mname)
 970                         stack.append(res)
 971                     elif opcode == 97:  # setproperty
 972                         index = u30(coder)
 973                         value = stack.pop()
 974                         idx = stack.pop()
 975                         obj = stack.pop()
 976                         assert isinstance(obj, list)
 977                         assert isinstance(idx, int)
 978                         obj[idx] = value
 979                     elif opcode == 98:  # getlocal
 980                         index = u30(coder)
 981                         stack.append(registers[index])
 982                     elif opcode == 99:  # setlocal
 983                         index = u30(coder)
 984                         value = stack.pop()
 985                         registers[index] = value
 986                     elif opcode == 102:  # getproperty
 987                         index = u30(coder)
 988                         pname = multinames[index]
 989                         if pname == u'length':
 990                             obj = stack.pop()
 991                             assert isinstance(obj, list)
 992                             stack.append(len(obj))
 993                         else:  # Assume attribute access
 994                             idx = stack.pop()
 995                             assert isinstance(idx, int)
 996                             obj = stack.pop()
 997                             assert isinstance(obj, list)
 998                             stack.append(obj[idx])
 999                     elif opcode == 128:  # coerce
1000                         u30(coder)
1001                     elif opcode == 133:  # coerce_s
1002                         assert isinstance(stack[-1], (type(None), compat_str))
1003                     elif opcode == 164:  # modulo
1004                         value2 = stack.pop()
1005                         value1 = stack.pop()
1006                         res = value1 % value2
1007                         stack.append(res)
1008                     elif opcode == 208:  # getlocal_0
1009                         stack.append(registers[0])
1010                     elif opcode == 209:  # getlocal_1
1011                         stack.append(registers[1])
1012                     elif opcode == 210:  # getlocal_2
1013                         stack.append(registers[2])
1014                     elif opcode == 211:  # getlocal_3
1015                         stack.append(registers[3])
1016                     elif opcode == 214:  # setlocal_2
1017                         registers[2] = stack.pop()
1018                     elif opcode == 215:  # setlocal_3
1019                         registers[3] = stack.pop()
1020                     else:
1021                         raise NotImplementedError(
1022                             u'Unsupported opcode %d' % opcode)
1023
1024             method_pyfunctions[func_name] = resfunc
1025             return resfunc
1026
1027         initial_function = extract_function(u'decipher')
1028         return lambda s: initial_function([s])
1029
1030     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1031         """Turn the encrypted s field into a working signature"""
1032
1033         if player_url is not None:
1034             try:
1035                 player_id = (player_url, len(s))
1036                 if player_id not in self._player_cache:
1037                     func = self._extract_signature_function(
1038                         video_id, player_url, len(s)
1039                     )
1040                     self._player_cache[player_id] = func
1041                 func = self._player_cache[player_id]
1042                 if self._downloader.params.get('youtube_print_sig_code'):
1043                     self._print_sig_code(func, len(s))
1044                 return func(s)
1045             except Exception:
1046                 tb = traceback.format_exc()
1047                 self._downloader.report_warning(
1048                     u'Automatic signature extraction failed: ' + tb)
1049
1050             self._downloader.report_warning(
1051                 u'Warning: Falling back to static signature algorithm')
1052
1053         return self._static_decrypt_signature(
1054             s, video_id, player_url, age_gate)
1055
1056     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1057         if age_gate:
1058             # The videos with age protection use another player, so the
1059             # algorithms can be different.
1060             if len(s) == 86:
1061                 return s[2:63] + s[82] + s[64:82] + s[63]
1062
1063         if len(s) == 93:
1064             return s[86:29:-1] + s[88] + s[28:5:-1]
1065         elif len(s) == 92:
1066             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1067         elif len(s) == 91:
1068             return s[84:27:-1] + s[86] + s[26:5:-1]
1069         elif len(s) == 90:
1070             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1071         elif len(s) == 89:
1072             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1073         elif len(s) == 88:
1074             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1075         elif len(s) == 87:
1076             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1077         elif len(s) == 86:
1078             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1079         elif len(s) == 85:
1080             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1081         elif len(s) == 84:
1082             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1083         elif len(s) == 83:
1084             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1085         elif len(s) == 82:
1086             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1087         elif len(s) == 81:
1088             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1089         elif len(s) == 80:
1090             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1091         elif len(s) == 79:
1092             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1093
1094         else:
1095             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1096
1097     def _get_available_subtitles(self, video_id):
1098         try:
1099             sub_list = self._download_webpage(
1100                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1101                 video_id, note=False)
1102         except ExtractorError as err:
1103             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1104             return {}
1105         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1106
1107         sub_lang_list = {}
1108         for l in lang_list:
1109             lang = l[1]
1110             params = compat_urllib_parse.urlencode({
1111                 'lang': lang,
1112                 'v': video_id,
1113                 'fmt': self._downloader.params.get('subtitlesformat'),
1114                 'name': l[0],
1115             })
1116             url = u'http://www.youtube.com/api/timedtext?' + params
1117             sub_lang_list[lang] = url
1118         if not sub_lang_list:
1119             self._downloader.report_warning(u'video doesn\'t have subtitles')
1120             return {}
1121         return sub_lang_list
1122
1123     def _get_available_automatic_caption(self, video_id, webpage):
1124         """We need the webpage for getting the captions url, pass it as an
1125            argument to speed up the process."""
1126         sub_format = self._downloader.params.get('subtitlesformat')
1127         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1128         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1129         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1130         if mobj is None:
1131             self._downloader.report_warning(err_msg)
1132             return {}
1133         player_config = json.loads(mobj.group(1))
1134         try:
1135             args = player_config[u'args']
1136             caption_url = args[u'ttsurl']
1137             timestamp = args[u'timestamp']
1138             # We get the available subtitles
1139             list_params = compat_urllib_parse.urlencode({
1140                 'type': 'list',
1141                 'tlangs': 1,
1142                 'asrs': 1,
1143             })
1144             list_url = caption_url + '&' + list_params
1145             list_page = self._download_webpage(list_url, video_id)
1146             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1147             original_lang_node = caption_list.find('track')
1148             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1149                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1150                 return {}
1151             original_lang = original_lang_node.attrib['lang_code']
1152
1153             sub_lang_list = {}
1154             for lang_node in caption_list.findall('target'):
1155                 sub_lang = lang_node.attrib['lang_code']
1156                 params = compat_urllib_parse.urlencode({
1157                     'lang': original_lang,
1158                     'tlang': sub_lang,
1159                     'fmt': sub_format,
1160                     'ts': timestamp,
1161                     'kind': 'asr',
1162                 })
1163                 sub_lang_list[sub_lang] = caption_url + '&' + params
1164             return sub_lang_list
1165         # An extractor error can be raise by the download process if there are
1166         # no automatic captions but there are subtitles
1167         except (KeyError, ExtractorError):
1168             self._downloader.report_warning(err_msg)
1169             return {}
1170
1171     def _print_formats(self, formats):
1172         print('Available formats:')
1173         for x in formats:
1174             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1175                                         self._video_dimensions.get(x, '???'),
1176                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1177
1178     def _extract_id(self, url):
1179         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1180         if mobj is None:
1181             raise ExtractorError(u'Invalid URL: %s' % url)
1182         video_id = mobj.group(2)
1183         return video_id
1184
1185     def _get_video_url_list(self, url_map):
1186         """
1187         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1188         with the requested formats.
1189         """
1190         req_format = self._downloader.params.get('format', None)
1191         format_limit = self._downloader.params.get('format_limit', None)
1192         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1193         if format_limit is not None and format_limit in available_formats:
1194             format_list = available_formats[available_formats.index(format_limit):]
1195         else:
1196             format_list = available_formats
1197         existing_formats = [x for x in format_list if x in url_map]
1198         if len(existing_formats) == 0:
1199             raise ExtractorError(u'no known formats available for video')
1200         if self._downloader.params.get('listformats', None):
1201             self._print_formats(existing_formats)
1202             return
1203         if req_format is None or req_format == 'best':
1204             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1205         elif req_format == 'worst':
1206             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1207         elif req_format in ('-1', 'all'):
1208             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1209         else:
1210             # Specific formats. We pick the first in a slash-delimeted sequence.
1211             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1212             # available in the specified format. For example,
1213             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1214             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1215             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1216             req_formats = req_format.split('/')
1217             video_url_list = None
1218             for rf in req_formats:
1219                 if rf in url_map:
1220                     video_url_list = [(rf, url_map[rf])]
1221                     break
1222                 if rf in self._video_formats_map:
1223                     for srf in self._video_formats_map[rf]:
1224                         if srf in url_map:
1225                             video_url_list = [(srf, url_map[srf])]
1226                             break
1227                     else:
1228                         continue
1229                     break
1230             if video_url_list is None:
1231                 raise ExtractorError(u'requested format not available')
1232         return video_url_list
1233
1234     def _extract_from_m3u8(self, manifest_url, video_id):
1235         url_map = {}
1236         def _get_urls(_manifest):
1237             lines = _manifest.split('\n')
1238             urls = filter(lambda l: l and not l.startswith('#'),
1239                             lines)
1240             return urls
1241         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1242         formats_urls = _get_urls(manifest)
1243         for format_url in formats_urls:
1244             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1245             url_map[itag] = format_url
1246         return url_map
1247
1248     def _extract_annotations(self, video_id):
1249         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1250         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1251
1252     def _real_extract(self, url):
1253         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1254         mobj = re.search(self._NEXT_URL_RE, url)
1255         if mobj:
1256             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1257         video_id = self._extract_id(url)
1258
1259         # Get video webpage
1260         self.report_video_webpage_download(video_id)
1261         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1262         request = compat_urllib_request.Request(url)
1263         try:
1264             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1265         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1266             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1267
1268         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1269
1270         # Attempt to extract SWF player URL
1271         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1272         if mobj is not None:
1273             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1274         else:
1275             player_url = None
1276
1277         # Get video info
1278         self.report_video_info_webpage_download(video_id)
1279         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1280             self.report_age_confirmation()
1281             age_gate = True
1282             # We simulate the access to the video from www.youtube.com/v/{video_id}
1283             # this can be viewed without login into Youtube
1284             data = compat_urllib_parse.urlencode({'video_id': video_id,
1285                                                   'el': 'embedded',
1286                                                   'gl': 'US',
1287                                                   'hl': 'en',
1288                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1289                                                   'asv': 3,
1290                                                   'sts':'1588',
1291                                                   })
1292             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1293             video_info_webpage = self._download_webpage(video_info_url, video_id,
1294                                     note=False,
1295                                     errnote='unable to download video info webpage')
1296             video_info = compat_parse_qs(video_info_webpage)
1297         else:
1298             age_gate = False
1299             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1300                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1301                         % (video_id, el_type))
1302                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1303                                         note=False,
1304                                         errnote='unable to download video info webpage')
1305                 video_info = compat_parse_qs(video_info_webpage)
1306                 if 'token' in video_info:
1307                     break
1308         if 'token' not in video_info:
1309             if 'reason' in video_info:
1310                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1311             else:
1312                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1313
1314         # Check for "rental" videos
1315         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1316             raise ExtractorError(u'"rental" videos not supported')
1317
1318         # Start extracting information
1319         self.report_information_extraction(video_id)
1320
1321         # uploader
1322         if 'author' not in video_info:
1323             raise ExtractorError(u'Unable to extract uploader name')
1324         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1325
1326         # uploader_id
1327         video_uploader_id = None
1328         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1329         if mobj is not None:
1330             video_uploader_id = mobj.group(1)
1331         else:
1332             self._downloader.report_warning(u'unable to extract uploader nickname')
1333
1334         # title
1335         if 'title' in video_info:
1336             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1337         else:
1338             self._downloader.report_warning(u'Unable to extract video title')
1339             video_title = u'_'
1340
1341         # thumbnail image
1342         # We try first to get a high quality image:
1343         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1344                             video_webpage, re.DOTALL)
1345         if m_thumb is not None:
1346             video_thumbnail = m_thumb.group(1)
1347         elif 'thumbnail_url' not in video_info:
1348             self._downloader.report_warning(u'unable to extract video thumbnail')
1349             video_thumbnail = None
1350         else:   # don't panic if we can't find it
1351             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1352
1353         # upload date
1354         upload_date = None
1355         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1356         if mobj is not None:
1357             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1358             upload_date = unified_strdate(upload_date)
1359
1360         # description
1361         video_description = get_element_by_id("eow-description", video_webpage)
1362         if video_description:
1363             video_description = clean_html(video_description)
1364         else:
1365             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1366             if fd_mobj:
1367                 video_description = unescapeHTML(fd_mobj.group(1))
1368             else:
1369                 video_description = u''
1370
1371         # subtitles
1372         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1373
1374         if self._downloader.params.get('listsubtitles', False):
1375             self._list_available_subtitles(video_id, video_webpage)
1376             return
1377
1378         if 'length_seconds' not in video_info:
1379             self._downloader.report_warning(u'unable to extract video duration')
1380             video_duration = ''
1381         else:
1382             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1383
1384         # annotations
1385         video_annotations = None
1386         if self._downloader.params.get('writeannotations', False):
1387                 video_annotations = self._extract_annotations(video_id)
1388
1389         # Decide which formats to download
1390
1391         try:
1392             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1393             if not mobj:
1394                 raise ValueError('Could not find vevo ID')
1395             info = json.loads(mobj.group(1))
1396             args = info['args']
1397             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1398             # this signatures are encrypted
1399             if 'url_encoded_fmt_stream_map' not in args:
1400                 raise ValueError(u'No stream_map present')  # caught below
1401             re_signature = re.compile(r'[&,]s=')
1402             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1403             if m_s is not None:
1404                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1405                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1406             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1407             if m_s is not None:
1408                 if 'adaptive_fmts' in video_info:
1409                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1410                 else:
1411                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1412         except ValueError:
1413             pass
1414
1415         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1416             self.report_rtmp_download()
1417             video_url_list = [(None, video_info['conn'][0])]
1418         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1419             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1420             if 'rtmpe%3Dyes' in encoded_url_map:
1421                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1422             url_map = {}
1423             for url_data_str in encoded_url_map.split(','):
1424                 url_data = compat_parse_qs(url_data_str)
1425                 if 'itag' in url_data and 'url' in url_data:
1426                     url = url_data['url'][0]
1427                     if 'sig' in url_data:
1428                         url += '&signature=' + url_data['sig'][0]
1429                     elif 's' in url_data:
1430                         encrypted_sig = url_data['s'][0]
1431                         if self._downloader.params.get('verbose'):
1432                             if age_gate:
1433                                 if player_url is None:
1434                                     player_version = 'unknown'
1435                                 else:
1436                                     player_version = self._search_regex(
1437                                         r'-(.+)\.swf$', player_url,
1438                                         u'flash player', fatal=False)
1439                                 player_desc = 'flash player %s' % player_version
1440                             else:
1441                                 player_version = self._search_regex(
1442                                     r'html5player-(.+?)\.js', video_webpage,
1443                                     'html5 player', fatal=False)
1444                                 player_desc = u'html5 player %s' % player_version
1445
1446                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1447                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1448                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1449
1450                         if not age_gate:
1451                             jsplayer_url_json = self._search_regex(
1452                                 r'"assets":.+?"js":\s*("[^"]+")',
1453                                 video_webpage, u'JS player URL')
1454                             player_url = json.loads(jsplayer_url_json)
1455
1456                         signature = self._decrypt_signature(
1457                             encrypted_sig, video_id, player_url, age_gate)
1458                         url += '&signature=' + signature
1459                     if 'ratebypass' not in url:
1460                         url += '&ratebypass=yes'
1461                     url_map[url_data['itag'][0]] = url
1462             video_url_list = self._get_video_url_list(url_map)
1463             if not video_url_list:
1464                 return
1465         elif video_info.get('hlsvp'):
1466             manifest_url = video_info['hlsvp'][0]
1467             url_map = self._extract_from_m3u8(manifest_url, video_id)
1468             video_url_list = self._get_video_url_list(url_map)
1469             if not video_url_list:
1470                 return
1471
1472         else:
1473             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1474
1475         results = []
1476         for itag, video_real_url in video_url_list:
1477             # Extension
1478             video_extension = self._video_extensions.get(itag, 'flv')
1479
1480             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1481                                               self._video_dimensions.get(itag, '???'),
1482                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1483
1484             results.append({
1485                 'id':       video_id,
1486                 'url':      video_real_url,
1487                 'uploader': video_uploader,
1488                 'uploader_id': video_uploader_id,
1489                 'upload_date':  upload_date,
1490                 'title':    video_title,
1491                 'ext':      video_extension,
1492                 'format':   video_format,
1493                 'format_id': itag,
1494                 'thumbnail':    video_thumbnail,
1495                 'description':  video_description,
1496                 'player_url':   player_url,
1497                 'subtitles':    video_subtitles,
1498                 'duration':     video_duration,
1499                 'age_limit':    18 if age_gate else 0,
1500                 'annotations':  video_annotations
1501             })
1502         return results
1503
1504 class YoutubePlaylistIE(InfoExtractor):
1505     IE_DESC = u'YouTube.com playlists'
1506     _VALID_URL = r"""(?:
1507                         (?:https?://)?
1508                         (?:\w+\.)?
1509                         youtube\.com/
1510                         (?:
1511                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1512                            \? (?:.*?&)*? (?:p|a|list)=
1513                         |  p/
1514                         )
1515                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1516                         .*
1517                      |
1518                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1519                      )"""
1520     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1521     _MAX_RESULTS = 50
1522     IE_NAME = u'youtube:playlist'
1523
1524     @classmethod
1525     def suitable(cls, url):
1526         """Receives a URL and returns True if suitable for this IE."""
1527         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1528
1529     def _real_extract(self, url):
1530         # Extract playlist id
1531         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1532         if mobj is None:
1533             raise ExtractorError(u'Invalid URL: %s' % url)
1534         playlist_id = mobj.group(1) or mobj.group(2)
1535
1536         # Check if it's a video-specific URL
1537         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1538         if 'v' in query_dict:
1539             video_id = query_dict['v'][0]
1540             if self._downloader.params.get('noplaylist'):
1541                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1542                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1543             else:
1544                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1545
1546         # Download playlist videos from API
1547         videos = []
1548
1549         for page_num in itertools.count(1):
1550             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1551             if start_index >= 1000:
1552                 self._downloader.report_warning(u'Max number of results reached')
1553                 break
1554             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1555             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1556
1557             try:
1558                 response = json.loads(page)
1559             except ValueError as err:
1560                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1561
1562             if 'feed' not in response:
1563                 raise ExtractorError(u'Got a malformed response from YouTube API')
1564             playlist_title = response['feed']['title']['$t']
1565             if 'entry' not in response['feed']:
1566                 # Number of videos is a multiple of self._MAX_RESULTS
1567                 break
1568
1569             for entry in response['feed']['entry']:
1570                 index = entry['yt$position']['$t']
1571                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1572                     videos.append((
1573                         index,
1574                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1575                     ))
1576
1577         videos = [v[1] for v in sorted(videos)]
1578
1579         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1580         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1581
1582
1583 class YoutubeChannelIE(InfoExtractor):
1584     IE_DESC = u'YouTube.com channels'
1585     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1586     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1587     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1588     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1589     IE_NAME = u'youtube:channel'
1590
1591     def extract_videos_from_page(self, page):
1592         ids_in_page = []
1593         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1594             if mobj.group(1) not in ids_in_page:
1595                 ids_in_page.append(mobj.group(1))
1596         return ids_in_page
1597
1598     def _real_extract(self, url):
1599         # Extract channel id
1600         mobj = re.match(self._VALID_URL, url)
1601         if mobj is None:
1602             raise ExtractorError(u'Invalid URL: %s' % url)
1603
1604         # Download channel page
1605         channel_id = mobj.group(1)
1606         video_ids = []
1607         pagenum = 1
1608
1609         url = self._TEMPLATE_URL % (channel_id, pagenum)
1610         page = self._download_webpage(url, channel_id,
1611                                       u'Downloading page #%s' % pagenum)
1612
1613         # Extract video identifiers
1614         ids_in_page = self.extract_videos_from_page(page)
1615         video_ids.extend(ids_in_page)
1616
1617         # Download any subsequent channel pages using the json-based channel_ajax query
1618         if self._MORE_PAGES_INDICATOR in page:
1619             for pagenum in itertools.count(1):
1620                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1621                 page = self._download_webpage(url, channel_id,
1622                                               u'Downloading page #%s' % pagenum)
1623
1624                 page = json.loads(page)
1625
1626                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1627                 video_ids.extend(ids_in_page)
1628
1629                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1630                     break
1631
1632         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1633
1634         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1635         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1636         return [self.playlist_result(url_entries, channel_id)]
1637
1638
1639 class YoutubeUserIE(InfoExtractor):
1640     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1641     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1642     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1643     _GDATA_PAGE_SIZE = 50
1644     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1645     IE_NAME = u'youtube:user'
1646
1647     @classmethod
1648     def suitable(cls, url):
1649         # Don't return True if the url can be extracted with other youtube
1650         # extractor, the regex would is too permissive and it would match.
1651         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1652         if any(ie.suitable(url) for ie in other_ies): return False
1653         else: return super(YoutubeUserIE, cls).suitable(url)
1654
1655     def _real_extract(self, url):
1656         # Extract username
1657         mobj = re.match(self._VALID_URL, url)
1658         if mobj is None:
1659             raise ExtractorError(u'Invalid URL: %s' % url)
1660
1661         username = mobj.group(1)
1662
1663         # Download video ids using YouTube Data API. Result size per
1664         # query is limited (currently to 50 videos) so we need to query
1665         # page by page until there are no video ids - it means we got
1666         # all of them.
1667
1668         video_ids = []
1669
1670         for pagenum in itertools.count(0):
1671             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1672
1673             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1674             page = self._download_webpage(gdata_url, username,
1675                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1676
1677             try:
1678                 response = json.loads(page)
1679             except ValueError as err:
1680                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1681             if 'entry' not in response['feed']:
1682                 # Number of videos is a multiple of self._MAX_RESULTS
1683                 break
1684
1685             # Extract video identifiers
1686             ids_in_page = []
1687             for entry in response['feed']['entry']:
1688                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1689             video_ids.extend(ids_in_page)
1690
1691             # A little optimization - if current page is not
1692             # "full", ie. does not contain PAGE_SIZE video ids then
1693             # we can assume that this page is the last one - there
1694             # are no more ids on further pages - no need to query
1695             # again.
1696
1697             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1698                 break
1699
1700         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1701         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1702         return [self.playlist_result(url_results, playlist_title = username)]
1703
1704 class YoutubeSearchIE(SearchInfoExtractor):
1705     IE_DESC = u'YouTube.com searches'
1706     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1707     _MAX_RESULTS = 1000
1708     IE_NAME = u'youtube:search'
1709     _SEARCH_KEY = 'ytsearch'
1710
1711     def report_download_page(self, query, pagenum):
1712         """Report attempt to download search page with given number."""
1713         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1714
1715     def _get_n_results(self, query, n):
1716         """Get a specified number of results for a query"""
1717
1718         video_ids = []
1719         pagenum = 0
1720         limit = n
1721
1722         while (50 * pagenum) < limit:
1723             self.report_download_page(query, pagenum+1)
1724             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1725             request = compat_urllib_request.Request(result_url)
1726             try:
1727                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1728             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1729                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1730             api_response = json.loads(data)['data']
1731
1732             if not 'items' in api_response:
1733                 raise ExtractorError(u'[youtube] No video results')
1734
1735             new_ids = list(video['id'] for video in api_response['items'])
1736             video_ids += new_ids
1737
1738             limit = min(n, api_response['totalItems'])
1739             pagenum += 1
1740
1741         if len(video_ids) > n:
1742             video_ids = video_ids[:n]
1743         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1744         return self.playlist_result(videos, query)
1745
1746
1747 class YoutubeShowIE(InfoExtractor):
1748     IE_DESC = u'YouTube.com (multi-season) shows'
1749     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1750     IE_NAME = u'youtube:show'
1751
1752     def _real_extract(self, url):
1753         mobj = re.match(self._VALID_URL, url)
1754         show_name = mobj.group(1)
1755         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1756         # There's one playlist for each season of the show
1757         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1758         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1759         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1760
1761
1762 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1763     """
1764     Base class for extractors that fetch info from
1765     http://www.youtube.com/feed_ajax
1766     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1767     """
1768     _LOGIN_REQUIRED = True
1769     _PAGING_STEP = 30
1770     # use action_load_personal_feed instead of action_load_system_feed
1771     _PERSONAL_FEED = False
1772
1773     @property
1774     def _FEED_TEMPLATE(self):
1775         action = 'action_load_system_feed'
1776         if self._PERSONAL_FEED:
1777             action = 'action_load_personal_feed'
1778         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1779
1780     @property
1781     def IE_NAME(self):
1782         return u'youtube:%s' % self._FEED_NAME
1783
1784     def _real_initialize(self):
1785         self._login()
1786
1787     def _real_extract(self, url):
1788         feed_entries = []
1789         # The step argument is available only in 2.7 or higher
1790         for i in itertools.count(0):
1791             paging = i*self._PAGING_STEP
1792             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1793                                           u'%s feed' % self._FEED_NAME,
1794                                           u'Downloading page %s' % i)
1795             info = json.loads(info)
1796             feed_html = info['feed_html']
1797             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1798             ids = orderedSet(m.group(1) for m in m_ids)
1799             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1800             if info['paging'] is None:
1801                 break
1802         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1803
1804 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1805     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1806     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1807     _FEED_NAME = 'subscriptions'
1808     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1809
1810 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1811     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1812     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1813     _FEED_NAME = 'recommended'
1814     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1815
1816 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1817     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1818     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1819     _FEED_NAME = 'watch_later'
1820     _PLAYLIST_TITLE = u'Youtube Watch Later'
1821     _PAGING_STEP = 100
1822     _PERSONAL_FEED = True
1823
1824 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1825     IE_NAME = u'youtube:favorites'
1826     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1827     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1828     _LOGIN_REQUIRED = True
1829
1830     def _real_extract(self, url):
1831         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1832         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1833         return self.url_result(playlist_id, 'YoutubePlaylist')
1834
1835
1836 class YoutubeTruncatedURLIE(InfoExtractor):
1837     IE_NAME = 'youtube:truncated_url'
1838     IE_DESC = False  # Do not list
1839     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1840
1841     def _real_extract(self, url):
1842         raise ExtractorError(
1843             u'Did you forget to quote the URL? Remember that & is a meta '
1844             u'character in most shells, so you want to put the URL in quotes, '
1845             u'like  youtube-dl '
1846             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1847             u' (or simply  youtube-dl BaW_jenozKc  ).',
1848             expected=True)