youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import itertools
   6 import io
   7 import json
   8 import operator
   9 import os.path
  10 import re
  11 import socket
  12 import string
  13 import struct
  14 import traceback
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381         {
 382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 383             u'file': u'TGi3HqYrWHE.mp4',
 384             u'note': u'm3u8 video',
 385             u'info_dict': {
 386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
 387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 388                 u'uploader': u'olympic',
 389                 u'upload_date': u'20120807',
 390                 u'uploader_id': u'olympic',
 391             },
 392             u'params': {
 393                 u'skip_download': True,
 394             },
 395         },
 396     ]
 397
 398
 399     @classmethod
 400     def suitable(cls, url):
 401         """Receives a URL and returns True if suitable for this IE."""
 402         if YoutubePlaylistIE.suitable(url): return False
 403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_webpage_download(self, video_id):
 410         """Report attempt to download video webpage."""
 411         self.to_screen(u'%s: Downloading video webpage' % video_id)
 412
 413     def report_video_info_webpage_download(self, video_id):
 414         """Report attempt to download video info webpage."""
 415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 416
 417     def report_information_extraction(self, video_id):
 418         """Report attempt to extract video information."""
 419         self.to_screen(u'%s: Extracting video information' % video_id)
 420
 421     def report_unavailable_format(self, video_id, format):
 422         """Report extracted video URL."""
 423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 424
 425     def report_rtmp_download(self):
 426         """Indicate the download will use the RTMP protocol."""
 427         self.to_screen(u'RTMP download detected')
 428
 429     def _extract_signature_function(self, video_id, player_url, slen):
 430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 431                         player_url)
 432         player_type = id_m.group('ext')
 433         player_id = id_m.group('id')
 434
 435         # Read from filesystem cache
 436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 437         assert os.path.basename(func_id) == func_id
 438         cache_dir = self._downloader.params.get('cachedir',
 439                                                 u'~/.youtube-dl/cache')
 440
 441         if cache_dir != u'NONE':
 442             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 443                                     u'youtube-sigfuncs',
 444                                     func_id + '.json')
 445             try:
 446                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 447                     cache_spec = json.load(cachef)
 448                 return lambda s: u''.join(s[i] for i in cache_spec)
 449             except IOError:
 450                 pass  # No cache available
 451
 452         if player_type == 'js':
 453             code = self._download_webpage(
 454                 player_url, video_id,
 455                 note=u'Downloading %s player %s' % (player_type, player_id),
 456                 errnote=u'Download of %s failed' % player_url)
 457             res = self._parse_sig_js(code)
 458         elif player_type == 'swf':
 459             urlh = self._request_webpage(
 460                 player_url, video_id,
 461                 note=u'Downloading %s player %s' % (player_type, player_id),
 462                 errnote=u'Download of %s failed' % player_url)
 463             code = urlh.read()
 464             res = self._parse_sig_swf(code)
 465         else:
 466             assert False, 'Invalid player type %r' % player_type
 467
 468         if cache_dir is not False:
 469             try:
 470                 cache_res = res(map(compat_chr, range(slen)))
 471                 cache_spec = [ord(c) for c in cache_res]
 472                 try:
 473                     os.makedirs(os.path.dirname(cache_fn))
 474                 except OSError as ose:
 475                     if ose.errno != errno.EEXIST:
 476                         raise
 477                 write_json_file(cache_spec, cache_fn)
 478             except Exception as e:
 479                 tb = traceback.format_exc()
 480                 self._downloader.report_warning(
 481                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 482
 483         return res
 484
 485     def _print_sig_code(self, func, slen):
 486         def gen_sig_code(idxs):
 487             def _genslice(start, end, step):
 488                 starts = u'' if start == 0 else str(start)
 489                 ends = u':%d' % (end+step)
 490                 steps = u'' if step == 1 else (':%d' % step)
 491                 return u's[%s%s%s]' % (starts, ends, steps)
 492
 493             step = None
 494             for i, prev in zip(idxs[1:], idxs[:-1]):
 495                 if step is not None:
 496                     if i - prev == step:
 497                         continue
 498                     yield _genslice(start, prev, step)
 499                     step = None
 500                     continue
 501                 if i - prev in [-1, 1]:
 502                     step = i - prev
 503                     start = prev
 504                     continue
 505                 else:
 506                     yield u's[%d]' % prev
 507             if step is None:
 508                 yield u's[%d]' % i
 509             else:
 510                 yield _genslice(start, i, step)
 511
 512         cache_res = func(map(compat_chr, range(slen)))
 513         cache_spec = [ord(c) for c in cache_res]
 514         expr_code = u' + '.join(gen_sig_code(cache_spec))
 515         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 516         self.to_screen(u'Extracted signature:\n' + code)
 517
 518     def _parse_sig_js(self, jscode):
 519         funcname = self._search_regex(
 520             r'signature=([a-zA-Z]+)', jscode,
 521             u'Initial JS player signature function name')
 522
 523         functions = {}
 524
 525         def argidx(varname):
 526             return string.lowercase.index(varname)
 527
 528         def interpret_statement(stmt, local_vars, allow_recursion=20):
 529             if allow_recursion < 0:
 530                 raise ExctractorError(u'Recursion limit reached')
 531
 532             if stmt.startswith(u'var '):
 533                 stmt = stmt[len(u'var '):]
 534             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 535                              r'=(?P<expr>.*)$', stmt)
 536             if ass_m:
 537                 if ass_m.groupdict().get('index'):
 538                     def assign(val):
 539                         lvar = local_vars[ass_m.group('out')]
 540                         idx = interpret_expression(ass_m.group('index'),
 541                                                    local_vars, allow_recursion)
 542                         assert isinstance(idx, int)
 543                         lvar[idx] = val
 544                         return val
 545                     expr = ass_m.group('expr')
 546                 else:
 547                     def assign(val):
 548                         local_vars[ass_m.group('out')] = val
 549                         return val
 550                     expr = ass_m.group('expr')
 551             elif stmt.startswith(u'return '):
 552                 assign = lambda v: v
 553                 expr = stmt[len(u'return '):]
 554             else:
 555                 raise ExtractorError(
 556                     u'Cannot determine left side of statement in %r' % stmt)
 557
 558             v = interpret_expression(expr, local_vars, allow_recursion)
 559             return assign(v)
 560
 561         def interpret_expression(expr, local_vars, allow_recursion):
 562             if expr.isdigit():
 563                 return int(expr)
 564
 565             if expr.isalpha():
 566                 return local_vars[expr]
 567
 568             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 569             if m:
 570                 member = m.group('member')
 571                 val = local_vars[m.group('in')]
 572                 if member == 'split("")':
 573                     return list(val)
 574                 if member == 'join("")':
 575                     return u''.join(val)
 576                 if member == 'length':
 577                     return len(val)
 578                 if member == 'reverse()':
 579                     return val[::-1]
 580                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 581                 if slice_m:
 582                     idx = interpret_expression(
 583                         slice_m.group('idx'), local_vars, allow_recursion-1)
 584                     return val[idx:]
 585
 586             m = re.match(
 587                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 588             if m:
 589                 val = local_vars[m.group('in')]
 590                 idx = interpret_expression(m.group('idx'), local_vars,
 591                                            allow_recursion-1)
 592                 return val[idx]
 593
 594             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 595             if m:
 596                 a = interpret_expression(m.group('a'),
 597                                          local_vars, allow_recursion)
 598                 b = interpret_expression(m.group('b'),
 599                                          local_vars, allow_recursion)
 600                 return a % b
 601
 602             m = re.match(
 603                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 604             if m:
 605                 fname = m.group('func')
 606                 if fname not in functions:
 607                     functions[fname] = extract_function(fname)
 608                 argvals = [int(v) if v.isdigit() else local_vars[v]
 609                            for v in m.group('args').split(',')]
 610                 return functions[fname](argvals)
 611             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 612
 613         def extract_function(funcname):
 614             func_m = re.search(
 615                 r'function ' + re.escape(funcname) +
 616                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 617                 jscode)
 618             argnames = func_m.group('args').split(',')
 619
 620             def resf(args):
 621                 local_vars = dict(zip(argnames, args))
 622                 for stmt in func_m.group('code').split(';'):
 623                     res = interpret_statement(stmt, local_vars)
 624                 return res
 625             return resf
 626
 627         initial_function = extract_function(funcname)
 628         return lambda s: initial_function([s])
 629
 630     def _parse_sig_swf(self, file_contents):
 631         if file_contents[1:3] != b'WS':
 632             raise ExtractorError(
 633                 u'Not an SWF file; header is %r' % file_contents[:3])
 634         if file_contents[:1] == b'C':
 635             content = zlib.decompress(file_contents[8:])
 636         else:
 637             raise NotImplementedError(u'Unsupported compression format %r' %
 638                                       file_contents[:1])
 639
 640         def extract_tags(content):
 641             pos = 0
 642             while pos < len(content):
 643                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 644                 pos += 2
 645                 tag_code = header16 >> 6
 646                 tag_len = header16 & 0x3f
 647                 if tag_len == 0x3f:
 648                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 649                     pos += 4
 650                 assert pos+tag_len <= len(content)
 651                 yield (tag_code, content[pos:pos+tag_len])
 652                 pos += tag_len
 653
 654         code_tag = next(tag
 655                         for tag_code, tag in extract_tags(content)
 656                         if tag_code == 82)
 657         p = code_tag.index(b'\0', 4) + 1
 658         code_reader = io.BytesIO(code_tag[p:])
 659
 660         # Parse ABC (AVM2 ByteCode)
 661         def read_int(reader=None):
 662             if reader is None:
 663                 reader = code_reader
 664             res = 0
 665             shift = 0
 666             for _ in range(5):
 667                 buf = reader.read(1)
 668                 assert len(buf) == 1
 669                 b = struct.unpack('<B', buf)[0]
 670                 res = res | ((b & 0x7f) << shift)
 671                 if b & 0x80 == 0:
 672                     break
 673                 shift += 7
 674             return res
 675
 676         def u30(reader=None):
 677             res = read_int(reader)
 678             assert res & 0xf0000000 == 0
 679             return res
 680         u32 = read_int
 681
 682         def s32(reader=None):
 683             v = read_int(reader)
 684             if v & 0x80000000 != 0:
 685                 v = - ((v ^ 0xffffffff) + 1)
 686             return v
 687
 688         def string(reader=None):
 689             if reader is None:
 690                 reader = code_reader
 691             slen = u30(reader)
 692             resb = reader.read(slen)
 693             assert len(resb) == slen
 694             return resb.decode('utf-8')
 695
 696         def read_bytes(count, reader=None):
 697             if reader is None:
 698                 reader = code_reader
 699             resb = reader.read(count)
 700             assert len(resb) == count
 701             return resb
 702
 703         def read_byte(reader=None):
 704             resb = read_bytes(1, reader=reader)
 705             res = struct.unpack('<B', resb)[0]
 706             return res
 707
 708         # minor_version + major_version
 709         _ = read_bytes(2 + 2)
 710
 711         # Constant pool
 712         int_count = u30()
 713         for _c in range(1, int_count):
 714             _ = s32()
 715         uint_count = u30()
 716         for _c in range(1, uint_count):
 717             _ = u32()
 718         double_count = u30()
 719         _ = read_bytes((double_count-1) * 8)
 720         string_count = u30()
 721         constant_strings = [u'']
 722         for _c in range(1, string_count):
 723             s = string()
 724             constant_strings.append(s)
 725         namespace_count = u30()
 726         for _c in range(1, namespace_count):
 727             _ = read_bytes(1)  # kind
 728             _ = u30()  # name
 729         ns_set_count = u30()
 730         for _c in range(1, ns_set_count):
 731             count = u30()
 732             for _c2 in range(count):
 733                 _ = u30()
 734         multiname_count = u30()
 735         MULTINAME_SIZES = {
 736             0x07: 2,  # QName
 737             0x0d: 2,  # QNameA
 738             0x0f: 1,  # RTQName
 739             0x10: 1,  # RTQNameA
 740             0x11: 0,  # RTQNameL
 741             0x12: 0,  # RTQNameLA
 742             0x09: 2,  # Multiname
 743             0x0e: 2,  # MultinameA
 744             0x1b: 1,  # MultinameL
 745             0x1c: 1,  # MultinameLA
 746         }
 747         multinames = [u'']
 748         for _c in range(1, multiname_count):
 749             kind = u30()
 750             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 751             if kind == 0x07:
 752                 namespace_idx = u30()
 753                 name_idx = u30()
 754                 multinames.append(constant_strings[name_idx])
 755             else:
 756                 multinames.append('[MULTINAME kind: %d]' % kind)
 757                 for _c2 in range(MULTINAME_SIZES[kind]):
 758                     _ = u30()
 759
 760         # Methods
 761         method_count = u30()
 762         MethodInfo = collections.namedtuple(
 763             'MethodInfo',
 764             ['NEED_ARGUMENTS', 'NEED_REST'])
 765         method_infos = []
 766         for method_id in range(method_count):
 767             param_count = u30()
 768             _ = u30()  # return type
 769             for _ in range(param_count):
 770                 _ = u30()  # param type
 771             _ = u30()  # name index (always 0 for youtube)
 772             flags = read_byte()
 773             if flags & 0x08 != 0:
 774                 # Options present
 775                 option_count = u30()
 776                 for c in range(option_count):
 777                     _ = u30()  # val
 778                     _ = read_bytes(1)  # kind
 779             if flags & 0x80 != 0:
 780                 # Param names present
 781                 for _ in range(param_count):
 782                     _ = u30()  # param name
 783             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 784             method_infos.append(mi)
 785
 786         # Metadata
 787         metadata_count = u30()
 788         for _c in range(metadata_count):
 789             _ = u30()  # name
 790             item_count = u30()
 791             for _c2 in range(item_count):
 792                 _ = u30()  # key
 793                 _ = u30()  # value
 794
 795         def parse_traits_info():
 796             trait_name_idx = u30()
 797             kind_full = read_byte()
 798             kind = kind_full & 0x0f
 799             attrs = kind_full >> 4
 800             methods = {}
 801             if kind in [0x00, 0x06]:  # Slot or Const
 802                 _ = u30()  # Slot id
 803                 type_name_idx = u30()
 804                 vindex = u30()
 805                 if vindex != 0:
 806                     _ = read_byte()  # vkind
 807             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 808                 _ = u30()  # disp_id
 809                 method_idx = u30()
 810                 methods[multinames[trait_name_idx]] = method_idx
 811             elif kind == 0x04:  # Class
 812                 _ = u30()  # slot_id
 813                 _ = u30()  # classi
 814             elif kind == 0x05:  # Function
 815                 _ = u30()  # slot_id
 816                 function_idx = u30()
 817                 methods[function_idx] = multinames[trait_name_idx]
 818             else:
 819                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 820
 821             if attrs & 0x4 != 0:  # Metadata present
 822                 metadata_count = u30()
 823                 for _c3 in range(metadata_count):
 824                     _ = u30()
 825
 826             return methods
 827
 828         # Classes
 829         TARGET_CLASSNAME = u'SignatureDecipher'
 830         searched_idx = multinames.index(TARGET_CLASSNAME)
 831         searched_class_id = None
 832         class_count = u30()
 833         for class_id in range(class_count):
 834             name_idx = u30()
 835             if name_idx == searched_idx:
 836                 # We found the class we're looking for!
 837                 searched_class_id = class_id
 838             _ = u30()  # super_name idx
 839             flags = read_byte()
 840             if flags & 0x08 != 0:  # Protected namespace is present
 841                 protected_ns_idx = u30()
 842             intrf_count = u30()
 843             for _c2 in range(intrf_count):
 844                 _ = u30()
 845             _ = u30()  # iinit
 846             trait_count = u30()
 847             for _c2 in range(trait_count):
 848                 _ = parse_traits_info()
 849
 850         if searched_class_id is None:
 851             raise ExtractorError(u'Target class %r not found' %
 852                                  TARGET_CLASSNAME)
 853
 854         method_names = {}
 855         method_idxs = {}
 856         for class_id in range(class_count):
 857             _ = u30()  # cinit
 858             trait_count = u30()
 859             for _c2 in range(trait_count):
 860                 trait_methods = parse_traits_info()
 861                 if class_id == searched_class_id:
 862                     method_names.update(trait_methods.items())
 863                     method_idxs.update(dict(
 864                         (idx, name)
 865                         for name, idx in trait_methods.items()))
 866
 867         # Scripts
 868         script_count = u30()
 869         for _c in range(script_count):
 870             _ = u30()  # init
 871             trait_count = u30()
 872             for _c2 in range(trait_count):
 873                 _ = parse_traits_info()
 874
 875         # Method bodies
 876         method_body_count = u30()
 877         Method = collections.namedtuple('Method', ['code', 'local_count'])
 878         methods = {}
 879         for _c in range(method_body_count):
 880             method_idx = u30()
 881             max_stack = u30()
 882             local_count = u30()
 883             init_scope_depth = u30()
 884             max_scope_depth = u30()
 885             code_length = u30()
 886             code = read_bytes(code_length)
 887             if method_idx in method_idxs:
 888                 m = Method(code, local_count)
 889                 methods[method_idxs[method_idx]] = m
 890             exception_count = u30()
 891             for _c2 in range(exception_count):
 892                 _ = u30()  # from
 893                 _ = u30()  # to
 894                 _ = u30()  # target
 895                 _ = u30()  # exc_type
 896                 _ = u30()  # var_name
 897             trait_count = u30()
 898             for _c2 in range(trait_count):
 899                 _ = parse_traits_info()
 900
 901         assert p + code_reader.tell() == len(code_tag)
 902         assert len(methods) == len(method_idxs)
 903
 904         method_pyfunctions = {}
 905
 906         def extract_function(func_name):
 907             if func_name in method_pyfunctions:
 908                 return method_pyfunctions[func_name]
 909             if func_name not in methods:
 910                 raise ExtractorError(u'Cannot find function %r' % func_name)
 911             m = methods[func_name]
 912
 913             def resfunc(args):
 914                 registers = ['(this)'] + list(args) + [None] * m.local_count
 915                 stack = []
 916                 coder = io.BytesIO(m.code)
 917                 while True:
 918                     opcode = struct.unpack('!B', coder.read(1))[0]
 919                     if opcode == 36:  # pushbyte
 920                         v = struct.unpack('!B', coder.read(1))[0]
 921                         stack.append(v)
 922                     elif opcode == 44:  # pushstring
 923                         idx = u30(coder)
 924                         stack.append(constant_strings[idx])
 925                     elif opcode == 48:  # pushscope
 926                         # We don't implement the scope register, so we'll just
 927                         # ignore the popped value
 928                         stack.pop()
 929                     elif opcode == 70:  # callproperty
 930                         index = u30(coder)
 931                         mname = multinames[index]
 932                         arg_count = u30(coder)
 933                         args = list(reversed(
 934                             [stack.pop() for _ in range(arg_count)]))
 935                         obj = stack.pop()
 936                         if mname == u'split':
 937                             assert len(args) == 1
 938                             assert isinstance(args[0], compat_str)
 939                             assert isinstance(obj, compat_str)
 940                             if args[0] == u'':
 941                                 res = list(obj)
 942                             else:
 943                                 res = obj.split(args[0])
 944                             stack.append(res)
 945                         elif mname == u'slice':
 946                             assert len(args) == 1
 947                             assert isinstance(args[0], int)
 948                             assert isinstance(obj, list)
 949                             res = obj[args[0]:]
 950                             stack.append(res)
 951                         elif mname == u'join':
 952                             assert len(args) == 1
 953                             assert isinstance(args[0], compat_str)
 954                             assert isinstance(obj, list)
 955                             res = args[0].join(obj)
 956                             stack.append(res)
 957                         elif mname in method_pyfunctions:
 958                             stack.append(method_pyfunctions[mname](args))
 959                         else:
 960                             raise NotImplementedError(
 961                                 u'Unsupported property %r on %r'
 962                                 % (mname, obj))
 963                     elif opcode == 72:  # returnvalue
 964                         res = stack.pop()
 965                         return res
 966                     elif opcode == 79:  # callpropvoid
 967                         index = u30(coder)
 968                         mname = multinames[index]
 969                         arg_count = u30(coder)
 970                         args = list(reversed(
 971                             [stack.pop() for _ in range(arg_count)]))
 972                         obj = stack.pop()
 973                         if mname == u'reverse':
 974                             assert isinstance(obj, list)
 975                             obj.reverse()
 976                         else:
 977                             raise NotImplementedError(
 978                                 u'Unsupported (void) property %r on %r'
 979                                 % (mname, obj))
 980                     elif opcode == 93:  # findpropstrict
 981                         index = u30(coder)
 982                         mname = multinames[index]
 983                         res = extract_function(mname)
 984                         stack.append(res)
 985                     elif opcode == 97:  # setproperty
 986                         index = u30(coder)
 987                         value = stack.pop()
 988                         idx = stack.pop()
 989                         obj = stack.pop()
 990                         assert isinstance(obj, list)
 991                         assert isinstance(idx, int)
 992                         obj[idx] = value
 993                     elif opcode == 98:  # getlocal
 994                         index = u30(coder)
 995                         stack.append(registers[index])
 996                     elif opcode == 99:  # setlocal
 997                         index = u30(coder)
 998                         value = stack.pop()
 999                         registers[index] = value
1000                     elif opcode == 102:  # getproperty
1001                         index = u30(coder)
1002                         pname = multinames[index]
1003                         if pname == u'length':
1004                             obj = stack.pop()
1005                             assert isinstance(obj, list)
1006                             stack.append(len(obj))
1007                         else:  # Assume attribute access
1008                             idx = stack.pop()
1009                             assert isinstance(idx, int)
1010                             obj = stack.pop()
1011                             assert isinstance(obj, list)
1012                             stack.append(obj[idx])
1013                     elif opcode == 128:  # coerce
1014                         _ = u30(coder)
1015                     elif opcode == 133:  # coerce_s
1016                         assert isinstance(stack[-1], (type(None), compat_str))
1017                     elif opcode == 164:  # modulo
1018                         value2 = stack.pop()
1019                         value1 = stack.pop()
1020                         res = value1 % value2
1021                         stack.append(res)
1022                     elif opcode == 208:  # getlocal_0
1023                         stack.append(registers[0])
1024                     elif opcode == 209:  # getlocal_1
1025                         stack.append(registers[1])
1026                     elif opcode == 210:  # getlocal_2
1027                         stack.append(registers[2])
1028                     elif opcode == 211:  # getlocal_3
1029                         stack.append(registers[3])
1030                     elif opcode == 214:  # setlocal_2
1031                         registers[2] = stack.pop()
1032                     elif opcode == 215:  # setlocal_3
1033                         registers[3] = stack.pop()
1034                     else:
1035                         raise NotImplementedError(
1036                             u'Unsupported opcode %d' % opcode)
1037
1038             method_pyfunctions[func_name] = resfunc
1039             return resfunc
1040
1041         initial_function = extract_function(u'decipher')
1042         return lambda s: initial_function([s])
1043
1044     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1045         """Turn the encrypted s field into a working signature"""
1046
1047         if player_url is not None:
1048             try:
1049                 if player_url not in self._player_cache:
1050                     func = self._extract_signature_function(
1051                         video_id, player_url, len(s)
1052                     )
1053                     self._player_cache[player_url] = func
1054                 func = self._player_cache[player_url]
1055                 if self._downloader.params.get('youtube_print_sig_code'):
1056                     self._print_sig_code(func, len(s))
1057                 return func(s)
1058             except Exception as e:
1059                 tb = traceback.format_exc()
1060                 self._downloader.report_warning(
1061                     u'Automatic signature extraction failed: ' + tb)
1062
1063         self._downloader.report_warning(
1064             u'Warning: Falling back to static signature algorithm')
1065         return self._static_decrypt_signature(
1066             s, video_id, player_url, age_gate)
1067
1068     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1069         if age_gate:
1070             # The videos with age protection use another player, so the
1071             # algorithms can be different.
1072             if len(s) == 86:
1073                 return s[2:63] + s[82] + s[64:82] + s[63]
1074
1075         if len(s) == 92:
1076             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1077         elif len(s) == 90:
1078             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1079         elif len(s) == 89:
1080             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1081         elif len(s) == 88:
1082             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1083         elif len(s) == 87:
1084             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1085         elif len(s) == 86:
1086             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1087         elif len(s) == 85:
1088             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1089         elif len(s) == 84:
1090             return s[81:36:-1] + s[0] + s[35:2:-1]
1091         elif len(s) == 83:
1092             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1093         elif len(s) == 82:
1094             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1095         elif len(s) == 81:
1096             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1097         elif len(s) == 80:
1098             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1099         elif len(s) == 79:
1100             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1101
1102         else:
1103             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1104
1105     def _decrypt_signature_age_gate(self, s):
1106         # The videos with age protection use another player, so the algorithms
1107         # can be different.
1108         if len(s) == 86:
1109             return s[2:63] + s[82] + s[64:82] + s[63]
1110         else:
1111             # Fallback to the other algortihms
1112             return self._decrypt_signature(s)
1113
1114     def _get_available_subtitles(self, video_id):
1115         try:
1116             sub_list = self._download_webpage(
1117                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1118                 video_id, note=False)
1119         except ExtractorError as err:
1120             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1121             return {}
1122         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1123
1124         sub_lang_list = {}
1125         for l in lang_list:
1126             lang = l[1]
1127             params = compat_urllib_parse.urlencode({
1128                 'lang': lang,
1129                 'v': video_id,
1130                 'fmt': self._downloader.params.get('subtitlesformat'),
1131             })
1132             url = u'http://www.youtube.com/api/timedtext?' + params
1133             sub_lang_list[lang] = url
1134         if not sub_lang_list:
1135             self._downloader.report_warning(u'video doesn\'t have subtitles')
1136             return {}
1137         return sub_lang_list
1138
1139     def _get_available_automatic_caption(self, video_id, webpage):
1140         """We need the webpage for getting the captions url, pass it as an
1141            argument to speed up the process."""
1142         sub_format = self._downloader.params.get('subtitlesformat')
1143         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1144         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1145         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1146         if mobj is None:
1147             self._downloader.report_warning(err_msg)
1148             return {}
1149         player_config = json.loads(mobj.group(1))
1150         try:
1151             args = player_config[u'args']
1152             caption_url = args[u'ttsurl']
1153             timestamp = args[u'timestamp']
1154             # We get the available subtitles
1155             list_params = compat_urllib_parse.urlencode({
1156                 'type': 'list',
1157                 'tlangs': 1,
1158                 'asrs': 1,
1159             })
1160             list_url = caption_url + '&' + list_params
1161             list_page = self._download_webpage(list_url, video_id)
1162             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1163             original_lang_node = caption_list.find('track')
1164             if original_lang_node.attrib.get('kind') != 'asr' :
1165                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1166                 return {}
1167             original_lang = original_lang_node.attrib['lang_code']
1168
1169             sub_lang_list = {}
1170             for lang_node in caption_list.findall('target'):
1171                 sub_lang = lang_node.attrib['lang_code']
1172                 params = compat_urllib_parse.urlencode({
1173                     'lang': original_lang,
1174                     'tlang': sub_lang,
1175                     'fmt': sub_format,
1176                     'ts': timestamp,
1177                     'kind': 'asr',
1178                 })
1179                 sub_lang_list[sub_lang] = caption_url + '&' + params
1180             return sub_lang_list
1181         # An extractor error can be raise by the download process if there are
1182         # no automatic captions but there are subtitles
1183         except (KeyError, ExtractorError):
1184             self._downloader.report_warning(err_msg)
1185             return {}
1186
1187     def _print_formats(self, formats):
1188         print('Available formats:')
1189         for x in formats:
1190             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1191                                         self._video_dimensions.get(x, '???'),
1192                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1193
1194     def _extract_id(self, url):
1195         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1196         if mobj is None:
1197             raise ExtractorError(u'Invalid URL: %s' % url)
1198         video_id = mobj.group(2)
1199         return video_id
1200
1201     def _get_video_url_list(self, url_map):
1202         """
1203         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1204         with the requested formats.
1205         """
1206         req_format = self._downloader.params.get('format', None)
1207         format_limit = self._downloader.params.get('format_limit', None)
1208         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1209         if format_limit is not None and format_limit in available_formats:
1210             format_list = available_formats[available_formats.index(format_limit):]
1211         else:
1212             format_list = available_formats
1213         existing_formats = [x for x in format_list if x in url_map]
1214         if len(existing_formats) == 0:
1215             raise ExtractorError(u'no known formats available for video')
1216         if self._downloader.params.get('listformats', None):
1217             self._print_formats(existing_formats)
1218             return
1219         if req_format is None or req_format == 'best':
1220             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1221         elif req_format == 'worst':
1222             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1223         elif req_format in ('-1', 'all'):
1224             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1225         else:
1226             # Specific formats. We pick the first in a slash-delimeted sequence.
1227             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1228             # available in the specified format. For example,
1229             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1230             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1231             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1232             req_formats = req_format.split('/')
1233             video_url_list = None
1234             for rf in req_formats:
1235                 if rf in url_map:
1236                     video_url_list = [(rf, url_map[rf])]
1237                     break
1238                 if rf in self._video_formats_map:
1239                     for srf in self._video_formats_map[rf]:
1240                         if srf in url_map:
1241                             video_url_list = [(srf, url_map[srf])]
1242                             break
1243                     else:
1244                         continue
1245                     break
1246             if video_url_list is None:
1247                 raise ExtractorError(u'requested format not available')
1248         return video_url_list
1249
1250     def _extract_from_m3u8(self, manifest_url, video_id):
1251         url_map = {}
1252         def _get_urls(_manifest):
1253             lines = _manifest.split('\n')
1254             urls = filter(lambda l: l and not l.startswith('#'),
1255                             lines)
1256             return urls
1257         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1258         formats_urls = _get_urls(manifest)
1259         for format_url in formats_urls:
1260             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1261             url_map[itag] = format_url
1262         return url_map
1263
1264     def _real_extract(self, url):
1265         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1266             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1267
1268         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1269         mobj = re.search(self._NEXT_URL_RE, url)
1270         if mobj:
1271             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1272         video_id = self._extract_id(url)
1273
1274         # Get video webpage
1275         self.report_video_webpage_download(video_id)
1276         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1277         request = compat_urllib_request.Request(url)
1278         try:
1279             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1280         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1281             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1282
1283         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1284
1285         # Attempt to extract SWF player URL
1286         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1287         if mobj is not None:
1288             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1289         else:
1290             player_url = None
1291
1292         # Get video info
1293         self.report_video_info_webpage_download(video_id)
1294         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1295             self.report_age_confirmation()
1296             age_gate = True
1297             # We simulate the access to the video from www.youtube.com/v/{video_id}
1298             # this can be viewed without login into Youtube
1299             data = compat_urllib_parse.urlencode({'video_id': video_id,
1300                                                   'el': 'embedded',
1301                                                   'gl': 'US',
1302                                                   'hl': 'en',
1303                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1304                                                   'asv': 3,
1305                                                   'sts':'1588',
1306                                                   })
1307             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1308             video_info_webpage = self._download_webpage(video_info_url, video_id,
1309                                     note=False,
1310                                     errnote='unable to download video info webpage')
1311             video_info = compat_parse_qs(video_info_webpage)
1312         else:
1313             age_gate = False
1314             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1315                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1316                         % (video_id, el_type))
1317                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1318                                         note=False,
1319                                         errnote='unable to download video info webpage')
1320                 video_info = compat_parse_qs(video_info_webpage)
1321                 if 'token' in video_info:
1322                     break
1323         if 'token' not in video_info:
1324             if 'reason' in video_info:
1325                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1326             else:
1327                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1328
1329         # Check for "rental" videos
1330         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1331             raise ExtractorError(u'"rental" videos not supported')
1332
1333         # Start extracting information
1334         self.report_information_extraction(video_id)
1335
1336         # uploader
1337         if 'author' not in video_info:
1338             raise ExtractorError(u'Unable to extract uploader name')
1339         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1340
1341         # uploader_id
1342         video_uploader_id = None
1343         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1344         if mobj is not None:
1345             video_uploader_id = mobj.group(1)
1346         else:
1347             self._downloader.report_warning(u'unable to extract uploader nickname')
1348
1349         # title
1350         if 'title' not in video_info:
1351             raise ExtractorError(u'Unable to extract video title')
1352         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1353
1354         # thumbnail image
1355         # We try first to get a high quality image:
1356         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1357                             video_webpage, re.DOTALL)
1358         if m_thumb is not None:
1359             video_thumbnail = m_thumb.group(1)
1360         elif 'thumbnail_url' not in video_info:
1361             self._downloader.report_warning(u'unable to extract video thumbnail')
1362             video_thumbnail = ''
1363         else:   # don't panic if we can't find it
1364             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1365
1366         # upload date
1367         upload_date = None
1368         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1369         if mobj is not None:
1370             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1371             upload_date = unified_strdate(upload_date)
1372
1373         # description
1374         video_description = get_element_by_id("eow-description", video_webpage)
1375         if video_description:
1376             video_description = clean_html(video_description)
1377         else:
1378             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1379             if fd_mobj:
1380                 video_description = unescapeHTML(fd_mobj.group(1))
1381             else:
1382                 video_description = u''
1383
1384         # subtitles
1385         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1386
1387         if self._downloader.params.get('listsubtitles', False):
1388             self._list_available_subtitles(video_id, video_webpage)
1389             return
1390
1391         if 'length_seconds' not in video_info:
1392             self._downloader.report_warning(u'unable to extract video duration')
1393             video_duration = ''
1394         else:
1395             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1396
1397         # Decide which formats to download
1398
1399         try:
1400             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1401             if not mobj:
1402                 raise ValueError('Could not find vevo ID')
1403             info = json.loads(mobj.group(1))
1404             args = info['args']
1405             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1406             # this signatures are encrypted
1407             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1408             if m_s is not None:
1409                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1410                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1411             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1412             if m_s is not None:
1413                 if 'url_encoded_fmt_stream_map' in video_info:
1414                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1415                 else:
1416                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1417             elif 'adaptive_fmts' in video_info:
1418                 if 'url_encoded_fmt_stream_map' in video_info:
1419                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1420                 else:
1421                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1422         except ValueError:
1423             pass
1424
1425         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1426             self.report_rtmp_download()
1427             video_url_list = [(None, video_info['conn'][0])]
1428         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1429             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1430                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1431             url_map = {}
1432             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1433                 url_data = compat_parse_qs(url_data_str)
1434                 if 'itag' in url_data and 'url' in url_data:
1435                     url = url_data['url'][0]
1436                     if 'sig' in url_data:
1437                         url += '&signature=' + url_data['sig'][0]
1438                     elif 's' in url_data:
1439                         encrypted_sig = url_data['s'][0]
1440                         if self._downloader.params.get('verbose'):
1441                             if age_gate:
1442                                 player_version = self._search_regex(
1443                                     r'-(.+)\.swf$',
1444                                     player_url if player_url else None,
1445                                     'flash player', fatal=False)
1446                                 player_desc = 'flash player %s' % player_version
1447                             else:
1448                                 player_version = self._search_regex(
1449                                     r'html5player-(.+?)\.js', video_webpage,
1450                                     'html5 player', fatal=False)
1451                                 player_desc = u'html5 player %s' % player_version
1452
1453                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1454                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1455                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1456
1457                         if not age_gate:
1458                             jsplayer_url_json = self._search_regex(
1459                                 r'"assets":.+?"js":\s*("[^"]+")',
1460                                 video_webpage, u'JS player URL')
1461                             player_url = json.loads(jsplayer_url_json)
1462
1463                         signature = self._decrypt_signature(
1464                             encrypted_sig, video_id, player_url, age_gate)
1465                         url += '&signature=' + signature
1466                     if 'ratebypass' not in url:
1467                         url += '&ratebypass=yes'
1468                     url_map[url_data['itag'][0]] = url
1469             video_url_list = self._get_video_url_list(url_map)
1470             if not video_url_list:
1471                 return
1472         elif video_info.get('hlsvp'):
1473             manifest_url = video_info['hlsvp'][0]
1474             url_map = self._extract_from_m3u8(manifest_url, video_id)
1475             video_url_list = self._get_video_url_list(url_map)
1476             if not video_url_list:
1477                 return
1478
1479         else:
1480             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1481
1482         results = []
1483         for format_param, video_real_url in video_url_list:
1484             # Extension
1485             video_extension = self._video_extensions.get(format_param, 'flv')
1486
1487             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1488                                               self._video_dimensions.get(format_param, '???'),
1489                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1490
1491             results.append({
1492                 'id':       video_id,
1493                 'url':      video_real_url,
1494                 'uploader': video_uploader,
1495                 'uploader_id': video_uploader_id,
1496                 'upload_date':  upload_date,
1497                 'title':    video_title,
1498                 'ext':      video_extension,
1499                 'format':   video_format,
1500                 'thumbnail':    video_thumbnail,
1501                 'description':  video_description,
1502                 'player_url':   player_url,
1503                 'subtitles':    video_subtitles,
1504                 'duration':     video_duration
1505             })
1506         return results
1507
1508 class YoutubePlaylistIE(InfoExtractor):
1509     IE_DESC = u'YouTube.com playlists'
1510     _VALID_URL = r"""(?:
1511                         (?:https?://)?
1512                         (?:\w+\.)?
1513                         youtube\.com/
1514                         (?:
1515                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1516                            \? (?:.*?&)*? (?:p|a|list)=
1517                         |  p/
1518                         )
1519                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1520                         .*
1521                      |
1522                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1523                      )"""
1524     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1525     _MAX_RESULTS = 50
1526     IE_NAME = u'youtube:playlist'
1527
1528     @classmethod
1529     def suitable(cls, url):
1530         """Receives a URL and returns True if suitable for this IE."""
1531         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1532
1533     def _real_extract(self, url):
1534         # Extract playlist id
1535         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1536         if mobj is None:
1537             raise ExtractorError(u'Invalid URL: %s' % url)
1538
1539         # Download playlist videos from API
1540         playlist_id = mobj.group(1) or mobj.group(2)
1541         videos = []
1542
1543         for page_num in itertools.count(1):
1544             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1545             if start_index >= 1000:
1546                 self._downloader.report_warning(u'Max number of results reached')
1547                 break
1548             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1549             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1550
1551             try:
1552                 response = json.loads(page)
1553             except ValueError as err:
1554                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1555
1556             if 'feed' not in response:
1557                 raise ExtractorError(u'Got a malformed response from YouTube API')
1558             playlist_title = response['feed']['title']['$t']
1559             if 'entry' not in response['feed']:
1560                 # Number of videos is a multiple of self._MAX_RESULTS
1561                 break
1562
1563             for entry in response['feed']['entry']:
1564                 index = entry['yt$position']['$t']
1565                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1566                     videos.append((
1567                         index,
1568                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1569                     ))
1570
1571         videos = [v[1] for v in sorted(videos)]
1572
1573         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1574         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1575
1576
1577 class YoutubeChannelIE(InfoExtractor):
1578     IE_DESC = u'YouTube.com channels'
1579     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1580     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1581     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1582     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1583     IE_NAME = u'youtube:channel'
1584
1585     def extract_videos_from_page(self, page):
1586         ids_in_page = []
1587         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1588             if mobj.group(1) not in ids_in_page:
1589                 ids_in_page.append(mobj.group(1))
1590         return ids_in_page
1591
1592     def _real_extract(self, url):
1593         # Extract channel id
1594         mobj = re.match(self._VALID_URL, url)
1595         if mobj is None:
1596             raise ExtractorError(u'Invalid URL: %s' % url)
1597
1598         # Download channel page
1599         channel_id = mobj.group(1)
1600         video_ids = []
1601         pagenum = 1
1602
1603         url = self._TEMPLATE_URL % (channel_id, pagenum)
1604         page = self._download_webpage(url, channel_id,
1605                                       u'Downloading page #%s' % pagenum)
1606
1607         # Extract video identifiers
1608         ids_in_page = self.extract_videos_from_page(page)
1609         video_ids.extend(ids_in_page)
1610
1611         # Download any subsequent channel pages using the json-based channel_ajax query
1612         if self._MORE_PAGES_INDICATOR in page:
1613             for pagenum in itertools.count(1):
1614                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1615                 page = self._download_webpage(url, channel_id,
1616                                               u'Downloading page #%s' % pagenum)
1617
1618                 page = json.loads(page)
1619
1620                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1621                 video_ids.extend(ids_in_page)
1622
1623                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1624                     break
1625
1626         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1627
1628         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1629         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1630         return [self.playlist_result(url_entries, channel_id)]
1631
1632
1633 class YoutubeUserIE(InfoExtractor):
1634     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1635     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1636     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1637     _GDATA_PAGE_SIZE = 50
1638     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1639     IE_NAME = u'youtube:user'
1640
1641     @classmethod
1642     def suitable(cls, url):
1643         # Don't return True if the url can be extracted with other youtube
1644         # extractor, the regex would is too permissive and it would match.
1645         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1646         if any(ie.suitable(url) for ie in other_ies): return False
1647         else: return super(YoutubeUserIE, cls).suitable(url)
1648
1649     def _real_extract(self, url):
1650         # Extract username
1651         mobj = re.match(self._VALID_URL, url)
1652         if mobj is None:
1653             raise ExtractorError(u'Invalid URL: %s' % url)
1654
1655         username = mobj.group(1)
1656
1657         # Download video ids using YouTube Data API. Result size per
1658         # query is limited (currently to 50 videos) so we need to query
1659         # page by page until there are no video ids - it means we got
1660         # all of them.
1661
1662         video_ids = []
1663
1664         for pagenum in itertools.count(0):
1665             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1666
1667             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1668             page = self._download_webpage(gdata_url, username,
1669                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1670
1671             try:
1672                 response = json.loads(page)
1673             except ValueError as err:
1674                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1675             if 'entry' not in response['feed']:
1676                 # Number of videos is a multiple of self._MAX_RESULTS
1677                 break
1678
1679             # Extract video identifiers
1680             ids_in_page = []
1681             for entry in response['feed']['entry']:
1682                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1683             video_ids.extend(ids_in_page)
1684
1685             # A little optimization - if current page is not
1686             # "full", ie. does not contain PAGE_SIZE video ids then
1687             # we can assume that this page is the last one - there
1688             # are no more ids on further pages - no need to query
1689             # again.
1690
1691             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1692                 break
1693
1694         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1695         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1696         return [self.playlist_result(url_results, playlist_title = username)]
1697
1698 class YoutubeSearchIE(SearchInfoExtractor):
1699     IE_DESC = u'YouTube.com searches'
1700     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1701     _MAX_RESULTS = 1000
1702     IE_NAME = u'youtube:search'
1703     _SEARCH_KEY = 'ytsearch'
1704
1705     def report_download_page(self, query, pagenum):
1706         """Report attempt to download search page with given number."""
1707         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1708
1709     def _get_n_results(self, query, n):
1710         """Get a specified number of results for a query"""
1711
1712         video_ids = []
1713         pagenum = 0
1714         limit = n
1715
1716         while (50 * pagenum) < limit:
1717             self.report_download_page(query, pagenum+1)
1718             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1719             request = compat_urllib_request.Request(result_url)
1720             try:
1721                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1722             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1723                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1724             api_response = json.loads(data)['data']
1725
1726             if not 'items' in api_response:
1727                 raise ExtractorError(u'[youtube] No video results')
1728
1729             new_ids = list(video['id'] for video in api_response['items'])
1730             video_ids += new_ids
1731
1732             limit = min(n, api_response['totalItems'])
1733             pagenum += 1
1734
1735         if len(video_ids) > n:
1736             video_ids = video_ids[:n]
1737         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1738         return self.playlist_result(videos, query)
1739
1740
1741 class YoutubeShowIE(InfoExtractor):
1742     IE_DESC = u'YouTube.com (multi-season) shows'
1743     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1744     IE_NAME = u'youtube:show'
1745
1746     def _real_extract(self, url):
1747         mobj = re.match(self._VALID_URL, url)
1748         show_name = mobj.group(1)
1749         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1750         # There's one playlist for each season of the show
1751         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1752         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1753         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1754
1755
1756 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1757     """
1758     Base class for extractors that fetch info from
1759     http://www.youtube.com/feed_ajax
1760     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1761     """
1762     _LOGIN_REQUIRED = True
1763     _PAGING_STEP = 30
1764     # use action_load_personal_feed instead of action_load_system_feed
1765     _PERSONAL_FEED = False
1766
1767     @property
1768     def _FEED_TEMPLATE(self):
1769         action = 'action_load_system_feed'
1770         if self._PERSONAL_FEED:
1771             action = 'action_load_personal_feed'
1772         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1773
1774     @property
1775     def IE_NAME(self):
1776         return u'youtube:%s' % self._FEED_NAME
1777
1778     def _real_initialize(self):
1779         self._login()
1780
1781     def _real_extract(self, url):
1782         feed_entries = []
1783         # The step argument is available only in 2.7 or higher
1784         for i in itertools.count(0):
1785             paging = i*self._PAGING_STEP
1786             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1787                                           u'%s feed' % self._FEED_NAME,
1788                                           u'Downloading page %s' % i)
1789             info = json.loads(info)
1790             feed_html = info['feed_html']
1791             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1792             ids = orderedSet(m.group(1) for m in m_ids)
1793             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1794             if info['paging'] is None:
1795                 break
1796         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1797
1798 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1799     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1800     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1801     _FEED_NAME = 'subscriptions'
1802     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1803
1804 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1805     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1806     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1807     _FEED_NAME = 'recommended'
1808     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1809
1810 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1811     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1812     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1813     _FEED_NAME = 'watch_later'
1814     _PLAYLIST_TITLE = u'Youtube Watch Later'
1815     _PAGING_STEP = 100
1816     _PERSONAL_FEED = True
1817
1818 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1819     IE_NAME = u'youtube:favorites'
1820     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1821     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1822     _LOGIN_REQUIRED = True
1823
1824     def _real_extract(self, url):
1825         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1826         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1827         return self.url_result(playlist_id, 'YoutubePlaylist')