youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import itertools
   6 import io
   7 import json
   8 import operator
   9 import os.path
  10 import re
  11 import socket
  12 import string
  13 import struct
  14 import traceback
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381         {
 382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 383             u'file': u'TGi3HqYrWHE.mp4',
 384             u'note': u'm3u8 video',
 385             u'info_dict': {
 386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
 387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 388                 u'uploader': u'olympic',
 389                 u'upload_date': u'20120807',
 390                 u'uploader_id': u'olympic',
 391             },
 392             u'params': {
 393                 u'skip_download': True,
 394             },
 395         },
 396     ]
 397
 398
 399     @classmethod
 400     def suitable(cls, url):
 401         """Receives a URL and returns True if suitable for this IE."""
 402         if YoutubePlaylistIE.suitable(url): return False
 403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_webpage_download(self, video_id):
 410         """Report attempt to download video webpage."""
 411         self.to_screen(u'%s: Downloading video webpage' % video_id)
 412
 413     def report_video_info_webpage_download(self, video_id):
 414         """Report attempt to download video info webpage."""
 415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 416
 417     def report_information_extraction(self, video_id):
 418         """Report attempt to extract video information."""
 419         self.to_screen(u'%s: Extracting video information' % video_id)
 420
 421     def report_unavailable_format(self, video_id, format):
 422         """Report extracted video URL."""
 423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 424
 425     def report_rtmp_download(self):
 426         """Indicate the download will use the RTMP protocol."""
 427         self.to_screen(u'RTMP download detected')
 428
 429     def _extract_signature_function(self, video_id, player_url, slen):
 430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 431                         player_url)
 432         player_type = id_m.group('ext')
 433         player_id = id_m.group('id')
 434
 435         # Read from filesystem cache
 436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 437         assert os.path.basename(func_id) == func_id
 438         cache_dir = self._downloader.params.get('cachedir',
 439                                                 u'~/.youtube-dl/cache')
 440
 441         if cache_dir != u'NONE':
 442             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 443                                     u'youtube-sigfuncs',
 444                                     func_id + '.json')
 445             try:
 446                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 447                     cache_spec = json.load(cachef)
 448                 return lambda s: u''.join(s[i] for i in cache_spec)
 449             except IOError:
 450                 pass  # No cache available
 451
 452         if player_type == 'js':
 453             code = self._download_webpage(
 454                 player_url, video_id,
 455                 note=u'Downloading %s player %s' % (player_type, player_id),
 456                 errnote=u'Download of %s failed' % player_url)
 457             res = self._parse_sig_js(code)
 458         elif player_type == 'swf':
 459             urlh = self._request_webpage(
 460                 player_url, video_id,
 461                 note=u'Downloading %s player %s' % (player_type, player_id),
 462                 errnote=u'Download of %s failed' % player_url)
 463             code = urlh.read()
 464             res = self._parse_sig_swf(code)
 465         else:
 466             assert False, 'Invalid player type %r' % player_type
 467
 468         if cache_dir is not False:
 469             try:
 470                 cache_res = res(map(compat_chr, range(slen)))
 471                 cache_spec = [ord(c) for c in cache_res]
 472                 try:
 473                     os.makedirs(os.path.dirname(cache_fn))
 474                 except OSError as ose:
 475                     if ose.errno != errno.EEXIST:
 476                         raise
 477                 write_json_file(cache_spec, cache_fn)
 478             except Exception as e:
 479                 tb = traceback.format_exc()
 480                 self._downloader.report_warning(
 481                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 482
 483         return res
 484
 485     def _print_sig_code(self, func, slen):
 486         def gen_sig_code(idxs):
 487             def _genslice(start, end, step):
 488                 starts = u'' if start == 0 else str(start)
 489                 ends = u':%d' % (end+step)
 490                 steps = u'' if step == 1 else (':%d' % step)
 491                 return u's[%s%s%s]' % (starts, ends, steps)
 492
 493             step = None
 494             for i, prev in zip(idxs[1:], idxs[:-1]):
 495                 if step is not None:
 496                     if i - prev == step:
 497                         continue
 498                     yield _genslice(start, prev, step)
 499                     step = None
 500                     continue
 501                 if i - prev in [-1, 1]:
 502                     step = i - prev
 503                     start = prev
 504                     continue
 505                 else:
 506                     yield u's[%d]' % prev
 507             if step is None:
 508                 yield u's[%d]' % i
 509             else:
 510                 yield _genslice(start, i, step)
 511
 512         cache_res = func(map(compat_chr, range(slen)))
 513         cache_spec = [ord(c) for c in cache_res]
 514         expr_code = u' + '.join(gen_sig_code(cache_spec))
 515         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 516         self.to_screen(u'Extracted signature:\n' + code)
 517
 518     def _parse_sig_js(self, jscode):
 519         funcname = self._search_regex(
 520             r'signature=([a-zA-Z]+)', jscode,
 521             u'Initial JS player signature function name')
 522
 523         functions = {}
 524
 525         def argidx(varname):
 526             return string.lowercase.index(varname)
 527
 528         def interpret_statement(stmt, local_vars, allow_recursion=20):
 529             if allow_recursion < 0:
 530                 raise ExctractorError(u'Recursion limit reached')
 531
 532             if stmt.startswith(u'var '):
 533                 stmt = stmt[len(u'var '):]
 534             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 535                              r'=(?P<expr>.*)$', stmt)
 536             if ass_m:
 537                 if ass_m.groupdict().get('index'):
 538                     def assign(val):
 539                         lvar = local_vars[ass_m.group('out')]
 540                         idx = interpret_expression(ass_m.group('index'),
 541                                                    local_vars, allow_recursion)
 542                         assert isinstance(idx, int)
 543                         lvar[idx] = val
 544                         return val
 545                     expr = ass_m.group('expr')
 546                 else:
 547                     def assign(val):
 548                         local_vars[ass_m.group('out')] = val
 549                         return val
 550                     expr = ass_m.group('expr')
 551             elif stmt.startswith(u'return '):
 552                 assign = lambda v: v
 553                 expr = stmt[len(u'return '):]
 554             else:
 555                 raise ExtractorError(
 556                     u'Cannot determine left side of statement in %r' % stmt)
 557
 558             v = interpret_expression(expr, local_vars, allow_recursion)
 559             return assign(v)
 560
 561         def interpret_expression(expr, local_vars, allow_recursion):
 562             if expr.isdigit():
 563                 return int(expr)
 564
 565             if expr.isalpha():
 566                 return local_vars[expr]
 567
 568             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 569             if m:
 570                 member = m.group('member')
 571                 val = local_vars[m.group('in')]
 572                 if member == 'split("")':
 573                     return list(val)
 574                 if member == 'join("")':
 575                     return u''.join(val)
 576                 if member == 'length':
 577                     return len(val)
 578                 if member == 'reverse()':
 579                     return val[::-1]
 580                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 581                 if slice_m:
 582                     idx = interpret_expression(
 583                         slice_m.group('idx'), local_vars, allow_recursion-1)
 584                     return val[idx:]
 585
 586             m = re.match(
 587                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 588             if m:
 589                 val = local_vars[m.group('in')]
 590                 idx = interpret_expression(m.group('idx'), local_vars,
 591                                            allow_recursion-1)
 592                 return val[idx]
 593
 594             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 595             if m:
 596                 a = interpret_expression(m.group('a'),
 597                                          local_vars, allow_recursion)
 598                 b = interpret_expression(m.group('b'),
 599                                          local_vars, allow_recursion)
 600                 return a % b
 601
 602             m = re.match(
 603                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 604             if m:
 605                 fname = m.group('func')
 606                 if fname not in functions:
 607                     functions[fname] = extract_function(fname)
 608                 argvals = [int(v) if v.isdigit() else local_vars[v]
 609                            for v in m.group('args').split(',')]
 610                 return functions[fname](argvals)
 611             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 612
 613         def extract_function(funcname):
 614             func_m = re.search(
 615                 r'function ' + re.escape(funcname) +
 616                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 617                 jscode)
 618             argnames = func_m.group('args').split(',')
 619
 620             def resf(args):
 621                 local_vars = dict(zip(argnames, args))
 622                 for stmt in func_m.group('code').split(';'):
 623                     res = interpret_statement(stmt, local_vars)
 624                 return res
 625             return resf
 626
 627         initial_function = extract_function(funcname)
 628         return lambda s: initial_function([s])
 629
 630     def _parse_sig_swf(self, file_contents):
 631         if file_contents[1:3] != b'WS':
 632             raise ExtractorError(
 633                 u'Not an SWF file; header is %r' % file_contents[:3])
 634         if file_contents[:1] == b'C':
 635             content = zlib.decompress(file_contents[8:])
 636         else:
 637             raise NotImplementedError(u'Unsupported compression format %r' %
 638                                       file_contents[:1])
 639
 640         def extract_tags(content):
 641             pos = 0
 642             while pos < len(content):
 643                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 644                 pos += 2
 645                 tag_code = header16 >> 6
 646                 tag_len = header16 & 0x3f
 647                 if tag_len == 0x3f:
 648                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 649                     pos += 4
 650                 assert pos+tag_len <= len(content)
 651                 yield (tag_code, content[pos:pos+tag_len])
 652                 pos += tag_len
 653
 654         code_tag = next(tag
 655                         for tag_code, tag in extract_tags(content)
 656                         if tag_code == 82)
 657         p = code_tag.index(b'\0', 4) + 1
 658         code_reader = io.BytesIO(code_tag[p:])
 659
 660         # Parse ABC (AVM2 ByteCode)
 661         def read_int(reader=None):
 662             if reader is None:
 663                 reader = code_reader
 664             res = 0
 665             shift = 0
 666             for _ in range(5):
 667                 buf = reader.read(1)
 668                 assert len(buf) == 1
 669                 b = struct.unpack('<B', buf)[0]
 670                 res = res | ((b & 0x7f) << shift)
 671                 if b & 0x80 == 0:
 672                     break
 673                 shift += 7
 674             return res
 675
 676         def u30(reader=None):
 677             res = read_int(reader)
 678             assert res & 0xf0000000 == 0
 679             return res
 680         u32 = read_int
 681
 682         def s32(reader=None):
 683             v = read_int(reader)
 684             if v & 0x80000000 != 0:
 685                 v = - ((v ^ 0xffffffff) + 1)
 686             return v
 687
 688         def string(reader=None):
 689             if reader is None:
 690                 reader = code_reader
 691             slen = u30(reader)
 692             resb = reader.read(slen)
 693             assert len(resb) == slen
 694             return resb.decode('utf-8')
 695
 696         def read_bytes(count, reader=None):
 697             if reader is None:
 698                 reader = code_reader
 699             resb = reader.read(count)
 700             assert len(resb) == count
 701             return resb
 702
 703         def read_byte(reader=None):
 704             resb = read_bytes(1, reader=reader)
 705             res = struct.unpack('<B', resb)[0]
 706             return res
 707
 708         # minor_version + major_version
 709         _ = read_bytes(2 + 2)
 710
 711         # Constant pool
 712         int_count = u30()
 713         for _c in range(1, int_count):
 714             _ = s32()
 715         uint_count = u30()
 716         for _c in range(1, uint_count):
 717             _ = u32()
 718         double_count = u30()
 719         _ = read_bytes((double_count-1) * 8)
 720         string_count = u30()
 721         constant_strings = [u'']
 722         for _c in range(1, string_count):
 723             s = string()
 724             constant_strings.append(s)
 725         namespace_count = u30()
 726         for _c in range(1, namespace_count):
 727             _ = read_bytes(1)  # kind
 728             _ = u30()  # name
 729         ns_set_count = u30()
 730         for _c in range(1, ns_set_count):
 731             count = u30()
 732             for _c2 in range(count):
 733                 _ = u30()
 734         multiname_count = u30()
 735         MULTINAME_SIZES = {
 736             0x07: 2,  # QName
 737             0x0d: 2,  # QNameA
 738             0x0f: 1,  # RTQName
 739             0x10: 1,  # RTQNameA
 740             0x11: 0,  # RTQNameL
 741             0x12: 0,  # RTQNameLA
 742             0x09: 2,  # Multiname
 743             0x0e: 2,  # MultinameA
 744             0x1b: 1,  # MultinameL
 745             0x1c: 1,  # MultinameLA
 746         }
 747         multinames = [u'']
 748         for _c in range(1, multiname_count):
 749             kind = u30()
 750             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 751             if kind == 0x07:
 752                 namespace_idx = u30()
 753                 name_idx = u30()
 754                 multinames.append(constant_strings[name_idx])
 755             else:
 756                 multinames.append('[MULTINAME kind: %d]' % kind)
 757                 for _c2 in range(MULTINAME_SIZES[kind]):
 758                     _ = u30()
 759
 760         # Methods
 761         method_count = u30()
 762         MethodInfo = collections.namedtuple(
 763             'MethodInfo',
 764             ['NEED_ARGUMENTS', 'NEED_REST'])
 765         method_infos = []
 766         for method_id in range(method_count):
 767             param_count = u30()
 768             _ = u30()  # return type
 769             for _ in range(param_count):
 770                 _ = u30()  # param type
 771             _ = u30()  # name index (always 0 for youtube)
 772             flags = read_byte()
 773             if flags & 0x08 != 0:
 774                 # Options present
 775                 option_count = u30()
 776                 for c in range(option_count):
 777                     _ = u30()  # val
 778                     _ = read_bytes(1)  # kind
 779             if flags & 0x80 != 0:
 780                 # Param names present
 781                 for _ in range(param_count):
 782                     _ = u30()  # param name
 783             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 784             method_infos.append(mi)
 785
 786         # Metadata
 787         metadata_count = u30()
 788         for _c in range(metadata_count):
 789             _ = u30()  # name
 790             item_count = u30()
 791             for _c2 in range(item_count):
 792                 _ = u30()  # key
 793                 _ = u30()  # value
 794
 795         def parse_traits_info():
 796             trait_name_idx = u30()
 797             kind_full = read_byte()
 798             kind = kind_full & 0x0f
 799             attrs = kind_full >> 4
 800             methods = {}
 801             if kind in [0x00, 0x06]:  # Slot or Const
 802                 _ = u30()  # Slot id
 803                 type_name_idx = u30()
 804                 vindex = u30()
 805                 if vindex != 0:
 806                     _ = read_byte()  # vkind
 807             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 808                 _ = u30()  # disp_id
 809                 method_idx = u30()
 810                 methods[multinames[trait_name_idx]] = method_idx
 811             elif kind == 0x04:  # Class
 812                 _ = u30()  # slot_id
 813                 _ = u30()  # classi
 814             elif kind == 0x05:  # Function
 815                 _ = u30()  # slot_id
 816                 function_idx = u30()
 817                 methods[function_idx] = multinames[trait_name_idx]
 818             else:
 819                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 820
 821             if attrs & 0x4 != 0:  # Metadata present
 822                 metadata_count = u30()
 823                 for _c3 in range(metadata_count):
 824                     _ = u30()
 825
 826             return methods
 827
 828         # Classes
 829         TARGET_CLASSNAME = u'SignatureDecipher'
 830         searched_idx = multinames.index(TARGET_CLASSNAME)
 831         searched_class_id = None
 832         class_count = u30()
 833         for class_id in range(class_count):
 834             name_idx = u30()
 835             if name_idx == searched_idx:
 836                 # We found the class we're looking for!
 837                 searched_class_id = class_id
 838             _ = u30()  # super_name idx
 839             flags = read_byte()
 840             if flags & 0x08 != 0:  # Protected namespace is present
 841                 protected_ns_idx = u30()
 842             intrf_count = u30()
 843             for _c2 in range(intrf_count):
 844                 _ = u30()
 845             _ = u30()  # iinit
 846             trait_count = u30()
 847             for _c2 in range(trait_count):
 848                 _ = parse_traits_info()
 849
 850         if searched_class_id is None:
 851             raise ExtractorError(u'Target class %r not found' %
 852                                  TARGET_CLASSNAME)
 853
 854         method_names = {}
 855         method_idxs = {}
 856         for class_id in range(class_count):
 857             _ = u30()  # cinit
 858             trait_count = u30()
 859             for _c2 in range(trait_count):
 860                 trait_methods = parse_traits_info()
 861                 if class_id == searched_class_id:
 862                     method_names.update(trait_methods.items())
 863                     method_idxs.update(dict(
 864                         (idx, name)
 865                         for name, idx in trait_methods.items()))
 866
 867         # Scripts
 868         script_count = u30()
 869         for _c in range(script_count):
 870             _ = u30()  # init
 871             trait_count = u30()
 872             for _c2 in range(trait_count):
 873                 _ = parse_traits_info()
 874
 875         # Method bodies
 876         method_body_count = u30()
 877         Method = collections.namedtuple('Method', ['code', 'local_count'])
 878         methods = {}
 879         for _c in range(method_body_count):
 880             method_idx = u30()
 881             max_stack = u30()
 882             local_count = u30()
 883             init_scope_depth = u30()
 884             max_scope_depth = u30()
 885             code_length = u30()
 886             code = read_bytes(code_length)
 887             if method_idx in method_idxs:
 888                 m = Method(code, local_count)
 889                 methods[method_idxs[method_idx]] = m
 890             exception_count = u30()
 891             for _c2 in range(exception_count):
 892                 _ = u30()  # from
 893                 _ = u30()  # to
 894                 _ = u30()  # target
 895                 _ = u30()  # exc_type
 896                 _ = u30()  # var_name
 897             trait_count = u30()
 898             for _c2 in range(trait_count):
 899                 _ = parse_traits_info()
 900
 901         assert p + code_reader.tell() == len(code_tag)
 902         assert len(methods) == len(method_idxs)
 903
 904         method_pyfunctions = {}
 905
 906         def extract_function(func_name):
 907             if func_name in method_pyfunctions:
 908                 return method_pyfunctions[func_name]
 909             if func_name not in methods:
 910                 raise ExtractorError(u'Cannot find function %r' % func_name)
 911             m = methods[func_name]
 912
 913             def resfunc(args):
 914                 registers = ['(this)'] + list(args) + [None] * m.local_count
 915                 stack = []
 916                 coder = io.BytesIO(m.code)
 917                 while True:
 918                     opcode = struct.unpack('!B', coder.read(1))[0]
 919                     if opcode == 36:  # pushbyte
 920                         v = struct.unpack('!B', coder.read(1))[0]
 921                         stack.append(v)
 922                     elif opcode == 44:  # pushstring
 923                         idx = u30(coder)
 924                         stack.append(constant_strings[idx])
 925                     elif opcode == 48:  # pushscope
 926                         # We don't implement the scope register, so we'll just
 927                         # ignore the popped value
 928                         stack.pop()
 929                     elif opcode == 70:  # callproperty
 930                         index = u30(coder)
 931                         mname = multinames[index]
 932                         arg_count = u30(coder)
 933                         args = list(reversed(
 934                             [stack.pop() for _ in range(arg_count)]))
 935                         obj = stack.pop()
 936                         if mname == u'split':
 937                             assert len(args) == 1
 938                             assert isinstance(args[0], compat_str)
 939                             assert isinstance(obj, compat_str)
 940                             if args[0] == u'':
 941                                 res = list(obj)
 942                             else:
 943                                 res = obj.split(args[0])
 944                             stack.append(res)
 945                         elif mname == u'slice':
 946                             assert len(args) == 1
 947                             assert isinstance(args[0], int)
 948                             assert isinstance(obj, list)
 949                             res = obj[args[0]:]
 950                             stack.append(res)
 951                         elif mname == u'join':
 952                             assert len(args) == 1
 953                             assert isinstance(args[0], compat_str)
 954                             assert isinstance(obj, list)
 955                             res = args[0].join(obj)
 956                             stack.append(res)
 957                         elif mname in method_pyfunctions:
 958                             stack.append(method_pyfunctions[mname](args))
 959                         else:
 960                             raise NotImplementedError(
 961                                 u'Unsupported property %r on %r'
 962                                 % (mname, obj))
 963                     elif opcode == 72:  # returnvalue
 964                         res = stack.pop()
 965                         return res
 966                     elif opcode == 79:  # callpropvoid
 967                         index = u30(coder)
 968                         mname = multinames[index]
 969                         arg_count = u30(coder)
 970                         args = list(reversed(
 971                             [stack.pop() for _ in range(arg_count)]))
 972                         obj = stack.pop()
 973                         if mname == u'reverse':
 974                             assert isinstance(obj, list)
 975                             obj.reverse()
 976                         else:
 977                             raise NotImplementedError(
 978                                 u'Unsupported (void) property %r on %r'
 979                                 % (mname, obj))
 980                     elif opcode == 93:  # findpropstrict
 981                         index = u30(coder)
 982                         mname = multinames[index]
 983                         res = extract_function(mname)
 984                         stack.append(res)
 985                     elif opcode == 97:  # setproperty
 986                         index = u30(coder)
 987                         value = stack.pop()
 988                         idx = stack.pop()
 989                         obj = stack.pop()
 990                         assert isinstance(obj, list)
 991                         assert isinstance(idx, int)
 992                         obj[idx] = value
 993                     elif opcode == 98:  # getlocal
 994                         index = u30(coder)
 995                         stack.append(registers[index])
 996                     elif opcode == 99:  # setlocal
 997                         index = u30(coder)
 998                         value = stack.pop()
 999                         registers[index] = value
1000                     elif opcode == 102:  # getproperty
1001                         index = u30(coder)
1002                         pname = multinames[index]
1003                         if pname == u'length':
1004                             obj = stack.pop()
1005                             assert isinstance(obj, list)
1006                             stack.append(len(obj))
1007                         else:  # Assume attribute access
1008                             idx = stack.pop()
1009                             assert isinstance(idx, int)
1010                             obj = stack.pop()
1011                             assert isinstance(obj, list)
1012                             stack.append(obj[idx])
1013                     elif opcode == 128:  # coerce
1014                         _ = u30(coder)
1015                     elif opcode == 133:  # coerce_s
1016                         assert isinstance(stack[-1], (type(None), compat_str))
1017                     elif opcode == 164:  # modulo
1018                         value2 = stack.pop()
1019                         value1 = stack.pop()
1020                         res = value1 % value2
1021                         stack.append(res)
1022                     elif opcode == 208:  # getlocal_0
1023                         stack.append(registers[0])
1024                     elif opcode == 209:  # getlocal_1
1025                         stack.append(registers[1])
1026                     elif opcode == 210:  # getlocal_2
1027                         stack.append(registers[2])
1028                     elif opcode == 211:  # getlocal_3
1029                         stack.append(registers[3])
1030                     elif opcode == 214:  # setlocal_2
1031                         registers[2] = stack.pop()
1032                     elif opcode == 215:  # setlocal_3
1033                         registers[3] = stack.pop()
1034                     else:
1035                         raise NotImplementedError(
1036                             u'Unsupported opcode %d' % opcode)
1037
1038             method_pyfunctions[func_name] = resfunc
1039             return resfunc
1040
1041         initial_function = extract_function(u'decipher')
1042         return lambda s: initial_function([s])
1043
1044     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1045         """Turn the encrypted s field into a working signature"""
1046
1047         if player_url is not None:
1048             try:
1049                 if player_url not in self._player_cache:
1050                     func = self._extract_signature_function(
1051                         video_id, player_url, len(s)
1052                     )
1053                     self._player_cache[player_url] = func
1054                 func = self._player_cache[player_url]
1055                 if self._downloader.params.get('youtube_print_sig_code'):
1056                     self._print_sig_code(func, len(s))
1057                 return func(s)
1058             except Exception as e:
1059                 tb = traceback.format_exc()
1060                 self._downloader.report_warning(
1061                     u'Automatic signature extraction failed: ' + tb)
1062
1063         self._downloader.report_warning(
1064             u'Warning: Falling back to static signature algorithm')
1065         return self._static_decrypt_signature(
1066             s, video_id, player_url, age_gate)
1067
1068     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1069         if age_gate:
1070             # The videos with age protection use another player, so the
1071             # algorithms can be different.
1072             if len(s) == 86:
1073                 return s[2:63] + s[82] + s[64:82] + s[63]
1074
1075         if len(s) == 93:
1076             return s[86:29:-1] + s[88] + s[28:5:-1]
1077         elif len(s) == 92:
1078             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1079         elif len(s) == 91:
1080             return s[84:27:-1] + s[86] + s[26:5:-1]
1081         elif len(s) == 90:
1082             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1083         elif len(s) == 89:
1084             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1085         elif len(s) == 88:
1086             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1087         elif len(s) == 87:
1088             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1089         elif len(s) == 86:
1090             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1091         elif len(s) == 85:
1092             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1093         elif len(s) == 84:
1094             return s[81:36:-1] + s[0] + s[35:2:-1]
1095         elif len(s) == 83:
1096             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1097         elif len(s) == 82:
1098             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1099         elif len(s) == 81:
1100             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1101         elif len(s) == 80:
1102             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1103         elif len(s) == 79:
1104             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1105
1106         else:
1107             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1108
1109     def _decrypt_signature_age_gate(self, s):
1110         # The videos with age protection use another player, so the algorithms
1111         # can be different.
1112         if len(s) == 86:
1113             return s[2:63] + s[82] + s[64:82] + s[63]
1114         else:
1115             # Fallback to the other algortihms
1116             return self._decrypt_signature(s)
1117
1118     def _get_available_subtitles(self, video_id):
1119         try:
1120             sub_list = self._download_webpage(
1121                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1122                 video_id, note=False)
1123         except ExtractorError as err:
1124             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1125             return {}
1126         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1127
1128         sub_lang_list = {}
1129         for l in lang_list:
1130             lang = l[1]
1131             params = compat_urllib_parse.urlencode({
1132                 'lang': lang,
1133                 'v': video_id,
1134                 'fmt': self._downloader.params.get('subtitlesformat'),
1135             })
1136             url = u'http://www.youtube.com/api/timedtext?' + params
1137             sub_lang_list[lang] = url
1138         if not sub_lang_list:
1139             self._downloader.report_warning(u'video doesn\'t have subtitles')
1140             return {}
1141         return sub_lang_list
1142
1143     def _get_available_automatic_caption(self, video_id, webpage):
1144         """We need the webpage for getting the captions url, pass it as an
1145            argument to speed up the process."""
1146         sub_format = self._downloader.params.get('subtitlesformat')
1147         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1148         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1149         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1150         if mobj is None:
1151             self._downloader.report_warning(err_msg)
1152             return {}
1153         player_config = json.loads(mobj.group(1))
1154         try:
1155             args = player_config[u'args']
1156             caption_url = args[u'ttsurl']
1157             timestamp = args[u'timestamp']
1158             # We get the available subtitles
1159             list_params = compat_urllib_parse.urlencode({
1160                 'type': 'list',
1161                 'tlangs': 1,
1162                 'asrs': 1,
1163             })
1164             list_url = caption_url + '&' + list_params
1165             list_page = self._download_webpage(list_url, video_id)
1166             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1167             original_lang_node = caption_list.find('track')
1168             if original_lang_node.attrib.get('kind') != 'asr' :
1169                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1170                 return {}
1171             original_lang = original_lang_node.attrib['lang_code']
1172
1173             sub_lang_list = {}
1174             for lang_node in caption_list.findall('target'):
1175                 sub_lang = lang_node.attrib['lang_code']
1176                 params = compat_urllib_parse.urlencode({
1177                     'lang': original_lang,
1178                     'tlang': sub_lang,
1179                     'fmt': sub_format,
1180                     'ts': timestamp,
1181                     'kind': 'asr',
1182                 })
1183                 sub_lang_list[sub_lang] = caption_url + '&' + params
1184             return sub_lang_list
1185         # An extractor error can be raise by the download process if there are
1186         # no automatic captions but there are subtitles
1187         except (KeyError, ExtractorError):
1188             self._downloader.report_warning(err_msg)
1189             return {}
1190
1191     def _print_formats(self, formats):
1192         print('Available formats:')
1193         for x in formats:
1194             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1195                                         self._video_dimensions.get(x, '???'),
1196                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1197
1198     def _extract_id(self, url):
1199         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1200         if mobj is None:
1201             raise ExtractorError(u'Invalid URL: %s' % url)
1202         video_id = mobj.group(2)
1203         return video_id
1204
1205     def _get_video_url_list(self, url_map):
1206         """
1207         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1208         with the requested formats.
1209         """
1210         req_format = self._downloader.params.get('format', None)
1211         format_limit = self._downloader.params.get('format_limit', None)
1212         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1213         if format_limit is not None and format_limit in available_formats:
1214             format_list = available_formats[available_formats.index(format_limit):]
1215         else:
1216             format_list = available_formats
1217         existing_formats = [x for x in format_list if x in url_map]
1218         if len(existing_formats) == 0:
1219             raise ExtractorError(u'no known formats available for video')
1220         if self._downloader.params.get('listformats', None):
1221             self._print_formats(existing_formats)
1222             return
1223         if req_format is None or req_format == 'best':
1224             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1225         elif req_format == 'worst':
1226             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1227         elif req_format in ('-1', 'all'):
1228             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1229         else:
1230             # Specific formats. We pick the first in a slash-delimeted sequence.
1231             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1232             # available in the specified format. For example,
1233             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1234             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1235             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1236             req_formats = req_format.split('/')
1237             video_url_list = None
1238             for rf in req_formats:
1239                 if rf in url_map:
1240                     video_url_list = [(rf, url_map[rf])]
1241                     break
1242                 if rf in self._video_formats_map:
1243                     for srf in self._video_formats_map[rf]:
1244                         if srf in url_map:
1245                             video_url_list = [(srf, url_map[srf])]
1246                             break
1247                     else:
1248                         continue
1249                     break
1250             if video_url_list is None:
1251                 raise ExtractorError(u'requested format not available')
1252         return video_url_list
1253
1254     def _extract_from_m3u8(self, manifest_url, video_id):
1255         url_map = {}
1256         def _get_urls(_manifest):
1257             lines = _manifest.split('\n')
1258             urls = filter(lambda l: l and not l.startswith('#'),
1259                             lines)
1260             return urls
1261         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1262         formats_urls = _get_urls(manifest)
1263         for format_url in formats_urls:
1264             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1265             url_map[itag] = format_url
1266         return url_map
1267
1268     def _real_extract(self, url):
1269         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1270             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1271
1272         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1273         mobj = re.search(self._NEXT_URL_RE, url)
1274         if mobj:
1275             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1276         video_id = self._extract_id(url)
1277
1278         # Get video webpage
1279         self.report_video_webpage_download(video_id)
1280         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1281         request = compat_urllib_request.Request(url)
1282         try:
1283             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1284         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1285             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1286
1287         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1288
1289         # Attempt to extract SWF player URL
1290         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1291         if mobj is not None:
1292             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1293         else:
1294             player_url = None
1295
1296         # Get video info
1297         self.report_video_info_webpage_download(video_id)
1298         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1299             self.report_age_confirmation()
1300             age_gate = True
1301             # We simulate the access to the video from www.youtube.com/v/{video_id}
1302             # this can be viewed without login into Youtube
1303             data = compat_urllib_parse.urlencode({'video_id': video_id,
1304                                                   'el': 'embedded',
1305                                                   'gl': 'US',
1306                                                   'hl': 'en',
1307                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1308                                                   'asv': 3,
1309                                                   'sts':'1588',
1310                                                   })
1311             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1312             video_info_webpage = self._download_webpage(video_info_url, video_id,
1313                                     note=False,
1314                                     errnote='unable to download video info webpage')
1315             video_info = compat_parse_qs(video_info_webpage)
1316         else:
1317             age_gate = False
1318             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1319                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1320                         % (video_id, el_type))
1321                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1322                                         note=False,
1323                                         errnote='unable to download video info webpage')
1324                 video_info = compat_parse_qs(video_info_webpage)
1325                 if 'token' in video_info:
1326                     break
1327         if 'token' not in video_info:
1328             if 'reason' in video_info:
1329                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1330             else:
1331                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1332
1333         # Check for "rental" videos
1334         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1335             raise ExtractorError(u'"rental" videos not supported')
1336
1337         # Start extracting information
1338         self.report_information_extraction(video_id)
1339
1340         # uploader
1341         if 'author' not in video_info:
1342             raise ExtractorError(u'Unable to extract uploader name')
1343         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1344
1345         # uploader_id
1346         video_uploader_id = None
1347         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1348         if mobj is not None:
1349             video_uploader_id = mobj.group(1)
1350         else:
1351             self._downloader.report_warning(u'unable to extract uploader nickname')
1352
1353         # title
1354         if 'title' not in video_info:
1355             raise ExtractorError(u'Unable to extract video title')
1356         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1357
1358         # thumbnail image
1359         # We try first to get a high quality image:
1360         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1361                             video_webpage, re.DOTALL)
1362         if m_thumb is not None:
1363             video_thumbnail = m_thumb.group(1)
1364         elif 'thumbnail_url' not in video_info:
1365             self._downloader.report_warning(u'unable to extract video thumbnail')
1366             video_thumbnail = ''
1367         else:   # don't panic if we can't find it
1368             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1369
1370         # upload date
1371         upload_date = None
1372         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1373         if mobj is not None:
1374             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1375             upload_date = unified_strdate(upload_date)
1376
1377         # description
1378         video_description = get_element_by_id("eow-description", video_webpage)
1379         if video_description:
1380             video_description = clean_html(video_description)
1381         else:
1382             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1383             if fd_mobj:
1384                 video_description = unescapeHTML(fd_mobj.group(1))
1385             else:
1386                 video_description = u''
1387
1388         # subtitles
1389         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1390
1391         if self._downloader.params.get('listsubtitles', False):
1392             self._list_available_subtitles(video_id, video_webpage)
1393             return
1394
1395         if 'length_seconds' not in video_info:
1396             self._downloader.report_warning(u'unable to extract video duration')
1397             video_duration = ''
1398         else:
1399             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1400
1401         # Decide which formats to download
1402
1403         try:
1404             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1405             if not mobj:
1406                 raise ValueError('Could not find vevo ID')
1407             info = json.loads(mobj.group(1))
1408             args = info['args']
1409             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1410             # this signatures are encrypted
1411             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1412             if m_s is not None:
1413                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1414                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1415             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1416             if m_s is not None:
1417                 if 'url_encoded_fmt_stream_map' in video_info:
1418                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1419                 else:
1420                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1421             elif 'adaptive_fmts' in video_info:
1422                 if 'url_encoded_fmt_stream_map' in video_info:
1423                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1424                 else:
1425                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1426         except ValueError:
1427             pass
1428
1429         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1430             self.report_rtmp_download()
1431             video_url_list = [(None, video_info['conn'][0])]
1432         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1433             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1434                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1435             url_map = {}
1436             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1437                 url_data = compat_parse_qs(url_data_str)
1438                 if 'itag' in url_data and 'url' in url_data:
1439                     url = url_data['url'][0]
1440                     if 'sig' in url_data:
1441                         url += '&signature=' + url_data['sig'][0]
1442                     elif 's' in url_data:
1443                         encrypted_sig = url_data['s'][0]
1444                         if self._downloader.params.get('verbose'):
1445                             if age_gate:
1446                                 player_version = self._search_regex(
1447                                     r'-(.+)\.swf$',
1448                                     player_url if player_url else None,
1449                                     'flash player', fatal=False)
1450                                 player_desc = 'flash player %s' % player_version
1451                             else:
1452                                 player_version = self._search_regex(
1453                                     r'html5player-(.+?)\.js', video_webpage,
1454                                     'html5 player', fatal=False)
1455                                 player_desc = u'html5 player %s' % player_version
1456
1457                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1458                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1459                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1460
1461                         if not age_gate:
1462                             jsplayer_url_json = self._search_regex(
1463                                 r'"assets":.+?"js":\s*("[^"]+")',
1464                                 video_webpage, u'JS player URL')
1465                             player_url = json.loads(jsplayer_url_json)
1466
1467                         signature = self._decrypt_signature(
1468                             encrypted_sig, video_id, player_url, age_gate)
1469                         url += '&signature=' + signature
1470                     if 'ratebypass' not in url:
1471                         url += '&ratebypass=yes'
1472                     url_map[url_data['itag'][0]] = url
1473             video_url_list = self._get_video_url_list(url_map)
1474             if not video_url_list:
1475                 return
1476         elif video_info.get('hlsvp'):
1477             manifest_url = video_info['hlsvp'][0]
1478             url_map = self._extract_from_m3u8(manifest_url, video_id)
1479             video_url_list = self._get_video_url_list(url_map)
1480             if not video_url_list:
1481                 return
1482
1483         else:
1484             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1485
1486         results = []
1487         for format_param, video_real_url in video_url_list:
1488             # Extension
1489             video_extension = self._video_extensions.get(format_param, 'flv')
1490
1491             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1492                                               self._video_dimensions.get(format_param, '???'),
1493                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1494
1495             results.append({
1496                 'id':       video_id,
1497                 'url':      video_real_url,
1498                 'uploader': video_uploader,
1499                 'uploader_id': video_uploader_id,
1500                 'upload_date':  upload_date,
1501                 'title':    video_title,
1502                 'ext':      video_extension,
1503                 'format':   video_format,
1504                 'thumbnail':    video_thumbnail,
1505                 'description':  video_description,
1506                 'player_url':   player_url,
1507                 'subtitles':    video_subtitles,
1508                 'duration':     video_duration
1509             })
1510         return results
1511
1512 class YoutubePlaylistIE(InfoExtractor):
1513     IE_DESC = u'YouTube.com playlists'
1514     _VALID_URL = r"""(?:
1515                         (?:https?://)?
1516                         (?:\w+\.)?
1517                         youtube\.com/
1518                         (?:
1519                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1520                            \? (?:.*?&)*? (?:p|a|list)=
1521                         |  p/
1522                         )
1523                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1524                         .*
1525                      |
1526                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1527                      )"""
1528     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1529     _MAX_RESULTS = 50
1530     IE_NAME = u'youtube:playlist'
1531
1532     @classmethod
1533     def suitable(cls, url):
1534         """Receives a URL and returns True if suitable for this IE."""
1535         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1536
1537     def _real_extract(self, url):
1538         # Extract playlist id
1539         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1540         if mobj is None:
1541             raise ExtractorError(u'Invalid URL: %s' % url)
1542
1543         # Download playlist videos from API
1544         playlist_id = mobj.group(1) or mobj.group(2)
1545         videos = []
1546
1547         for page_num in itertools.count(1):
1548             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1549             if start_index >= 1000:
1550                 self._downloader.report_warning(u'Max number of results reached')
1551                 break
1552             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1553             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1554
1555             try:
1556                 response = json.loads(page)
1557             except ValueError as err:
1558                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1559
1560             if 'feed' not in response:
1561                 raise ExtractorError(u'Got a malformed response from YouTube API')
1562             playlist_title = response['feed']['title']['$t']
1563             if 'entry' not in response['feed']:
1564                 # Number of videos is a multiple of self._MAX_RESULTS
1565                 break
1566
1567             for entry in response['feed']['entry']:
1568                 index = entry['yt$position']['$t']
1569                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1570                     videos.append((
1571                         index,
1572                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1573                     ))
1574
1575         videos = [v[1] for v in sorted(videos)]
1576
1577         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1578         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1579
1580
1581 class YoutubeChannelIE(InfoExtractor):
1582     IE_DESC = u'YouTube.com channels'
1583     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1584     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1585     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1586     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1587     IE_NAME = u'youtube:channel'
1588
1589     def extract_videos_from_page(self, page):
1590         ids_in_page = []
1591         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1592             if mobj.group(1) not in ids_in_page:
1593                 ids_in_page.append(mobj.group(1))
1594         return ids_in_page
1595
1596     def _real_extract(self, url):
1597         # Extract channel id
1598         mobj = re.match(self._VALID_URL, url)
1599         if mobj is None:
1600             raise ExtractorError(u'Invalid URL: %s' % url)
1601
1602         # Download channel page
1603         channel_id = mobj.group(1)
1604         video_ids = []
1605         pagenum = 1
1606
1607         url = self._TEMPLATE_URL % (channel_id, pagenum)
1608         page = self._download_webpage(url, channel_id,
1609                                       u'Downloading page #%s' % pagenum)
1610
1611         # Extract video identifiers
1612         ids_in_page = self.extract_videos_from_page(page)
1613         video_ids.extend(ids_in_page)
1614
1615         # Download any subsequent channel pages using the json-based channel_ajax query
1616         if self._MORE_PAGES_INDICATOR in page:
1617             for pagenum in itertools.count(1):
1618                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1619                 page = self._download_webpage(url, channel_id,
1620                                               u'Downloading page #%s' % pagenum)
1621
1622                 page = json.loads(page)
1623
1624                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1625                 video_ids.extend(ids_in_page)
1626
1627                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1628                     break
1629
1630         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1631
1632         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1633         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1634         return [self.playlist_result(url_entries, channel_id)]
1635
1636
1637 class YoutubeUserIE(InfoExtractor):
1638     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1639     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1640     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1641     _GDATA_PAGE_SIZE = 50
1642     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1643     IE_NAME = u'youtube:user'
1644
1645     @classmethod
1646     def suitable(cls, url):
1647         # Don't return True if the url can be extracted with other youtube
1648         # extractor, the regex would is too permissive and it would match.
1649         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1650         if any(ie.suitable(url) for ie in other_ies): return False
1651         else: return super(YoutubeUserIE, cls).suitable(url)
1652
1653     def _real_extract(self, url):
1654         # Extract username
1655         mobj = re.match(self._VALID_URL, url)
1656         if mobj is None:
1657             raise ExtractorError(u'Invalid URL: %s' % url)
1658
1659         username = mobj.group(1)
1660
1661         # Download video ids using YouTube Data API. Result size per
1662         # query is limited (currently to 50 videos) so we need to query
1663         # page by page until there are no video ids - it means we got
1664         # all of them.
1665
1666         video_ids = []
1667
1668         for pagenum in itertools.count(0):
1669             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1670
1671             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1672             page = self._download_webpage(gdata_url, username,
1673                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1674
1675             try:
1676                 response = json.loads(page)
1677             except ValueError as err:
1678                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1679             if 'entry' not in response['feed']:
1680                 # Number of videos is a multiple of self._MAX_RESULTS
1681                 break
1682
1683             # Extract video identifiers
1684             ids_in_page = []
1685             for entry in response['feed']['entry']:
1686                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1687             video_ids.extend(ids_in_page)
1688
1689             # A little optimization - if current page is not
1690             # "full", ie. does not contain PAGE_SIZE video ids then
1691             # we can assume that this page is the last one - there
1692             # are no more ids on further pages - no need to query
1693             # again.
1694
1695             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1696                 break
1697
1698         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1699         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1700         return [self.playlist_result(url_results, playlist_title = username)]
1701
1702 class YoutubeSearchIE(SearchInfoExtractor):
1703     IE_DESC = u'YouTube.com searches'
1704     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1705     _MAX_RESULTS = 1000
1706     IE_NAME = u'youtube:search'
1707     _SEARCH_KEY = 'ytsearch'
1708
1709     def report_download_page(self, query, pagenum):
1710         """Report attempt to download search page with given number."""
1711         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1712
1713     def _get_n_results(self, query, n):
1714         """Get a specified number of results for a query"""
1715
1716         video_ids = []
1717         pagenum = 0
1718         limit = n
1719
1720         while (50 * pagenum) < limit:
1721             self.report_download_page(query, pagenum+1)
1722             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1723             request = compat_urllib_request.Request(result_url)
1724             try:
1725                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1726             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1727                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1728             api_response = json.loads(data)['data']
1729
1730             if not 'items' in api_response:
1731                 raise ExtractorError(u'[youtube] No video results')
1732
1733             new_ids = list(video['id'] for video in api_response['items'])
1734             video_ids += new_ids
1735
1736             limit = min(n, api_response['totalItems'])
1737             pagenum += 1
1738
1739         if len(video_ids) > n:
1740             video_ids = video_ids[:n]
1741         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1742         return self.playlist_result(videos, query)
1743
1744
1745 class YoutubeShowIE(InfoExtractor):
1746     IE_DESC = u'YouTube.com (multi-season) shows'
1747     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1748     IE_NAME = u'youtube:show'
1749
1750     def _real_extract(self, url):
1751         mobj = re.match(self._VALID_URL, url)
1752         show_name = mobj.group(1)
1753         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1754         # There's one playlist for each season of the show
1755         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1756         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1757         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1758
1759
1760 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1761     """
1762     Base class for extractors that fetch info from
1763     http://www.youtube.com/feed_ajax
1764     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1765     """
1766     _LOGIN_REQUIRED = True
1767     _PAGING_STEP = 30
1768     # use action_load_personal_feed instead of action_load_system_feed
1769     _PERSONAL_FEED = False
1770
1771     @property
1772     def _FEED_TEMPLATE(self):
1773         action = 'action_load_system_feed'
1774         if self._PERSONAL_FEED:
1775             action = 'action_load_personal_feed'
1776         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1777
1778     @property
1779     def IE_NAME(self):
1780         return u'youtube:%s' % self._FEED_NAME
1781
1782     def _real_initialize(self):
1783         self._login()
1784
1785     def _real_extract(self, url):
1786         feed_entries = []
1787         # The step argument is available only in 2.7 or higher
1788         for i in itertools.count(0):
1789             paging = i*self._PAGING_STEP
1790             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1791                                           u'%s feed' % self._FEED_NAME,
1792                                           u'Downloading page %s' % i)
1793             info = json.loads(info)
1794             feed_html = info['feed_html']
1795             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1796             ids = orderedSet(m.group(1) for m in m_ids)
1797             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1798             if info['paging'] is None:
1799                 break
1800         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1801
1802 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1803     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1804     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1805     _FEED_NAME = 'subscriptions'
1806     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1807
1808 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1809     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1810     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1811     _FEED_NAME = 'recommended'
1812     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1813
1814 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1815     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1816     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1817     _FEED_NAME = 'watch_later'
1818     _PLAYLIST_TITLE = u'Youtube Watch Later'
1819     _PAGING_STEP = 100
1820     _PERSONAL_FEED = True
1821
1822 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1823     IE_NAME = u'youtube:favorites'
1824     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1825     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1826     _LOGIN_REQUIRED = True
1827
1828     def _real_extract(self, url):
1829         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1830         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1831         return self.url_result(playlist_id, 'YoutubePlaylist')