youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import zlib
  15
  16 from .common import InfoExtractor, SearchInfoExtractor
  17 from .subtitles import SubtitlesInfoExtractor
  18 from ..utils import (
  19     compat_chr,
  20     compat_http_client,
  21     compat_parse_qs,
  22     compat_urllib_error,
  23     compat_urllib_parse,
  24     compat_urllib_request,
  25     compat_urlparse,
  26     compat_str,
  27
  28     clean_html,
  29     get_cachedir,
  30     get_element_by_id,
  31     ExtractorError,
  32     unescapeHTML,
  33     unified_strdate,
  34     orderedSet,
  35     write_json_file,
  36 )
  37
  38 class YoutubeBaseInfoExtractor(InfoExtractor):
  39     """Provide base functions for Youtube extractors"""
  40     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  41     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  42     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  43     _NETRC_MACHINE = 'youtube'
  44     # If True it will raise an error if no login info is provided
  45     _LOGIN_REQUIRED = False
  46
  47     def report_lang(self):
  48         """Report attempt to set language."""
  49         self.to_screen(u'Setting language')
  50
  51     def _set_language(self):
  52         request = compat_urllib_request.Request(self._LANG_URL)
  53         try:
  54             self.report_lang()
  55             compat_urllib_request.urlopen(request).read()
  56         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  57             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  58             return False
  59         return True
  60
  61     def _login(self):
  62         (username, password) = self._get_login_info()
  63         # No authentication to be performed
  64         if username is None:
  65             if self._LOGIN_REQUIRED:
  66                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  67             return False
  68
  69         request = compat_urllib_request.Request(self._LOGIN_URL)
  70         try:
  71             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  72         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  73             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  74             return False
  75
  76         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  77                                   login_page, u'Login GALX parameter')
  78
  79         # Log in
  80         login_form_strs = {
  81                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  82                 u'Email': username,
  83                 u'GALX': galx,
  84                 u'Passwd': password,
  85                 u'PersistentCookie': u'yes',
  86                 u'_utf8': u'霱',
  87                 u'bgresponse': u'js_disabled',
  88                 u'checkConnection': u'',
  89                 u'checkedDomains': u'youtube',
  90                 u'dnConn': u'',
  91                 u'pstMsg': u'0',
  92                 u'rmShown': u'1',
  93                 u'secTok': u'',
  94                 u'signIn': u'Sign in',
  95                 u'timeStmp': u'',
  96                 u'service': u'youtube',
  97                 u'uilel': u'3',
  98                 u'hl': u'en_US',
  99         }
 100         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 101         # chokes on unicode
 102         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 103         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 104         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 105         try:
 106             self.report_login()
 107             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 108             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 109                 self._downloader.report_warning(u'unable to log in: bad username or password')
 110                 return False
 111         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 112             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 113             return False
 114         return True
 115
 116     def _confirm_age(self):
 117         age_form = {
 118                 'next_url':     '/',
 119                 'action_confirm':   'Confirm',
 120                 }
 121         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 122         try:
 123             self.report_age_confirmation()
 124             compat_urllib_request.urlopen(request).read().decode('utf-8')
 125         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 126             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 127         return True
 128
 129     def _real_initialize(self):
 130         if self._downloader is None:
 131             return
 132         if not self._set_language():
 133             return
 134         if not self._login():
 135             return
 136         self._confirm_age()
 137
 138
 139 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 140     IE_DESC = u'YouTube.com'
 141     _VALID_URL = r"""(?x)^
 142                      (
 143                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
 144                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 145                             tube\.majestyc\.net/|
 146                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 147                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 148                          (?:                                                  # the various things that can precede the ID:
 149                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 150                              |(?:                                             # or the v= param in all its forms
 151                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 152                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 153                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 154                                  v=
 155                              )
 156                          ))
 157                          |youtu\.be/                                          # just youtu.be/xxxx
 158                          )
 159                      )?                                                       # all until now is optional -> you can pass the naked ID
 160                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 161                      (?(1).+)?                                                # if we found the ID, everything can follow
 162                      $"""
 163     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 164     # Listed in order of quality
 165     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 166                           # Apple HTTP Live Streaming
 167                           '96', '95', '94', '93', '92', '132', '151',
 168                           # 3D
 169                           '85', '84', '102', '83', '101', '82', '100',
 170                           # Dash video
 171                           '138', '137', '248', '136', '247', '135', '246',
 172                           '245', '244', '134', '243', '133', '242', '160',
 173                           # Dash audio
 174                           '141', '172', '140', '171', '139',
 175                           ]
 176     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 177                                       # Apple HTTP Live Streaming
 178                                       '96', '95', '94', '93', '92', '132', '151',
 179                                       # 3D
 180                                       '85', '102', '84', '101', '83', '100', '82',
 181                                       # Dash video
 182                                       '138', '248', '137', '247', '136', '246', '245',
 183                                       '244', '135', '243', '134', '242', '133', '160',
 184                                       # Dash audio
 185                                       '172', '141', '171', '140', '139',
 186                                       ]
 187     _video_formats_map = {
 188         'flv': ['35', '34', '6', '5'],
 189         '3gp': ['36', '17', '13'],
 190         'mp4': ['38', '37', '22', '18'],
 191         'webm': ['46', '45', '44', '43'],
 192     }
 193     _video_extensions = {
 194         '13': '3gp',
 195         '17': '3gp',
 196         '18': 'mp4',
 197         '22': 'mp4',
 198         '36': '3gp',
 199         '37': 'mp4',
 200         '38': 'mp4',
 201         '43': 'webm',
 202         '44': 'webm',
 203         '45': 'webm',
 204         '46': 'webm',
 205
 206         # 3d videos
 207         '82': 'mp4',
 208         '83': 'mp4',
 209         '84': 'mp4',
 210         '85': 'mp4',
 211         '100': 'webm',
 212         '101': 'webm',
 213         '102': 'webm',
 214
 215         # Apple HTTP Live Streaming
 216         '92': 'mp4',
 217         '93': 'mp4',
 218         '94': 'mp4',
 219         '95': 'mp4',
 220         '96': 'mp4',
 221         '132': 'mp4',
 222         '151': 'mp4',
 223
 224         # Dash mp4
 225         '133': 'mp4',
 226         '134': 'mp4',
 227         '135': 'mp4',
 228         '136': 'mp4',
 229         '137': 'mp4',
 230         '138': 'mp4',
 231         '160': 'mp4',
 232
 233         # Dash mp4 audio
 234         '139': 'm4a',
 235         '140': 'm4a',
 236         '141': 'm4a',
 237
 238         # Dash webm
 239         '171': 'webm',
 240         '172': 'webm',
 241         '242': 'webm',
 242         '243': 'webm',
 243         '244': 'webm',
 244         '245': 'webm',
 245         '246': 'webm',
 246         '247': 'webm',
 247         '248': 'webm',
 248     }
 249     _video_dimensions = {
 250         '5': '400x240',
 251         '6': '???',
 252         '13': '???',
 253         '17': '176x144',
 254         '18': '640x360',
 255         '22': '1280x720',
 256         '34': '640x360',
 257         '35': '854x480',
 258         '36': '320x240',
 259         '37': '1920x1080',
 260         '38': '4096x3072',
 261         '43': '640x360',
 262         '44': '854x480',
 263         '45': '1280x720',
 264         '46': '1920x1080',
 265         '82': '360p',
 266         '83': '480p',
 267         '84': '720p',
 268         '85': '1080p',
 269         '92': '240p',
 270         '93': '360p',
 271         '94': '480p',
 272         '95': '720p',
 273         '96': '1080p',
 274         '100': '360p',
 275         '101': '480p',
 276         '102': '720p',
 277         '132': '240p',
 278         '151': '72p',
 279         '133': '240p',
 280         '134': '360p',
 281         '135': '480p',
 282         '136': '720p',
 283         '137': '1080p',
 284         '138': '>1080p',
 285         '139': '48k',
 286         '140': '128k',
 287         '141': '256k',
 288         '160': '192p',
 289         '171': '128k',
 290         '172': '256k',
 291         '242': '240p',
 292         '243': '360p',
 293         '244': '480p',
 294         '245': '480p',
 295         '246': '480p',
 296         '247': '720p',
 297         '248': '1080p',
 298     }
 299     _special_itags = {
 300         '82': '3D',
 301         '83': '3D',
 302         '84': '3D',
 303         '85': '3D',
 304         '100': '3D',
 305         '101': '3D',
 306         '102': '3D',
 307         '133': 'DASH Video',
 308         '134': 'DASH Video',
 309         '135': 'DASH Video',
 310         '136': 'DASH Video',
 311         '137': 'DASH Video',
 312         '138': 'DASH Video',
 313         '139': 'DASH Audio',
 314         '140': 'DASH Audio',
 315         '141': 'DASH Audio',
 316         '160': 'DASH Video',
 317         '171': 'DASH Audio',
 318         '172': 'DASH Audio',
 319         '242': 'DASH Video',
 320         '243': 'DASH Video',
 321         '244': 'DASH Video',
 322         '245': 'DASH Video',
 323         '246': 'DASH Video',
 324         '247': 'DASH Video',
 325         '248': 'DASH Video',
 326     }
 327
 328     IE_NAME = u'youtube'
 329     _TESTS = [
 330         {
 331             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 332             u"file":  u"BaW_jenozKc.mp4",
 333             u"info_dict": {
 334                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 335                 u"uploader": u"Philipp Hagemeister",
 336                 u"uploader_id": u"phihag",
 337                 u"upload_date": u"20121002",
 338                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 339             }
 340         },
 341         {
 342             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 343             u"file":  u"UxxajLWwzqY.mp4",
 344             u"note": u"Test generic use_cipher_signature video (#897)",
 345             u"info_dict": {
 346                 u"upload_date": u"20120506",
 347                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 348                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 349                 u"uploader": u"Icona Pop",
 350                 u"uploader_id": u"IconaPop"
 351             }
 352         },
 353         {
 354             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 355             u"file":  u"07FYdnEawAQ.mp4",
 356             u"note": u"Test VEVO video with age protection (#956)",
 357             u"info_dict": {
 358                 u"upload_date": u"20130703",
 359                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 360                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 361                 u"uploader": u"justintimberlakeVEVO",
 362                 u"uploader_id": u"justintimberlakeVEVO"
 363             }
 364         },
 365         {
 366             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
 367             u"file":  u"yZIXLfi8CZQ.mp4",
 368             u"note": u"Embed-only video (#1746)",
 369             u"info_dict": {
 370                 u"upload_date": u"20120608",
 371                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
 372                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
 373                 u"uploader": u"SET India",
 374                 u"uploader_id": u"setindia"
 375             }
 376         },
 377     ]
 378
 379
 380     @classmethod
 381     def suitable(cls, url):
 382         """Receives a URL and returns True if suitable for this IE."""
 383         if YoutubePlaylistIE.suitable(url): return False
 384         return re.match(cls._VALID_URL, url) is not None
 385
 386     def __init__(self, *args, **kwargs):
 387         super(YoutubeIE, self).__init__(*args, **kwargs)
 388         self._player_cache = {}
 389
 390     def report_video_webpage_download(self, video_id):
 391         """Report attempt to download video webpage."""
 392         self.to_screen(u'%s: Downloading video webpage' % video_id)
 393
 394     def report_video_info_webpage_download(self, video_id):
 395         """Report attempt to download video info webpage."""
 396         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 397
 398     def report_information_extraction(self, video_id):
 399         """Report attempt to extract video information."""
 400         self.to_screen(u'%s: Extracting video information' % video_id)
 401
 402     def report_unavailable_format(self, video_id, format):
 403         """Report extracted video URL."""
 404         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 405
 406     def report_rtmp_download(self):
 407         """Indicate the download will use the RTMP protocol."""
 408         self.to_screen(u'RTMP download detected')
 409
 410     def _extract_signature_function(self, video_id, player_url, slen):
 411         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 412                         player_url)
 413         player_type = id_m.group('ext')
 414         player_id = id_m.group('id')
 415
 416         # Read from filesystem cache
 417         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 418         assert os.path.basename(func_id) == func_id
 419         cache_dir = get_cachedir(self._downloader.params)
 420
 421         cache_enabled = cache_dir is not None
 422         if cache_enabled:
 423             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 424                                     u'youtube-sigfuncs',
 425                                     func_id + '.json')
 426             try:
 427                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 428                     cache_spec = json.load(cachef)
 429                 return lambda s: u''.join(s[i] for i in cache_spec)
 430             except IOError:
 431                 pass  # No cache available
 432
 433         if player_type == 'js':
 434             code = self._download_webpage(
 435                 player_url, video_id,
 436                 note=u'Downloading %s player %s' % (player_type, player_id),
 437                 errnote=u'Download of %s failed' % player_url)
 438             res = self._parse_sig_js(code)
 439         elif player_type == 'swf':
 440             urlh = self._request_webpage(
 441                 player_url, video_id,
 442                 note=u'Downloading %s player %s' % (player_type, player_id),
 443                 errnote=u'Download of %s failed' % player_url)
 444             code = urlh.read()
 445             res = self._parse_sig_swf(code)
 446         else:
 447             assert False, 'Invalid player type %r' % player_type
 448
 449         if cache_enabled:
 450             try:
 451                 test_string = u''.join(map(compat_chr, range(slen)))
 452                 cache_res = res(test_string)
 453                 cache_spec = [ord(c) for c in cache_res]
 454                 try:
 455                     os.makedirs(os.path.dirname(cache_fn))
 456                 except OSError as ose:
 457                     if ose.errno != errno.EEXIST:
 458                         raise
 459                 write_json_file(cache_spec, cache_fn)
 460             except Exception:
 461                 tb = traceback.format_exc()
 462                 self._downloader.report_warning(
 463                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 464
 465         return res
 466
 467     def _print_sig_code(self, func, slen):
 468         def gen_sig_code(idxs):
 469             def _genslice(start, end, step):
 470                 starts = u'' if start == 0 else str(start)
 471                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 472                 steps = u'' if step == 1 else (u':%d' % step)
 473                 return u's[%s%s%s]' % (starts, ends, steps)
 474
 475             step = None
 476             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 477                                     # set as soon as step is set
 478             for i, prev in zip(idxs[1:], idxs[:-1]):
 479                 if step is not None:
 480                     if i - prev == step:
 481                         continue
 482                     yield _genslice(start, prev, step)
 483                     step = None
 484                     continue
 485                 if i - prev in [-1, 1]:
 486                     step = i - prev
 487                     start = prev
 488                     continue
 489                 else:
 490                     yield u's[%d]' % prev
 491             if step is None:
 492                 yield u's[%d]' % i
 493             else:
 494                 yield _genslice(start, i, step)
 495
 496         test_string = u''.join(map(compat_chr, range(slen)))
 497         cache_res = func(test_string)
 498         cache_spec = [ord(c) for c in cache_res]
 499         expr_code = u' + '.join(gen_sig_code(cache_spec))
 500         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 501         self.to_screen(u'Extracted signature function:\n' + code)
 502
 503     def _parse_sig_js(self, jscode):
 504         funcname = self._search_regex(
 505             r'signature=([a-zA-Z]+)', jscode,
 506             u'Initial JS player signature function name')
 507
 508         functions = {}
 509
 510         def argidx(varname):
 511             return string.lowercase.index(varname)
 512
 513         def interpret_statement(stmt, local_vars, allow_recursion=20):
 514             if allow_recursion < 0:
 515                 raise ExtractorError(u'Recursion limit reached')
 516
 517             if stmt.startswith(u'var '):
 518                 stmt = stmt[len(u'var '):]
 519             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 520                              r'=(?P<expr>.*)$', stmt)
 521             if ass_m:
 522                 if ass_m.groupdict().get('index'):
 523                     def assign(val):
 524                         lvar = local_vars[ass_m.group('out')]
 525                         idx = interpret_expression(ass_m.group('index'),
 526                                                    local_vars, allow_recursion)
 527                         assert isinstance(idx, int)
 528                         lvar[idx] = val
 529                         return val
 530                     expr = ass_m.group('expr')
 531                 else:
 532                     def assign(val):
 533                         local_vars[ass_m.group('out')] = val
 534                         return val
 535                     expr = ass_m.group('expr')
 536             elif stmt.startswith(u'return '):
 537                 assign = lambda v: v
 538                 expr = stmt[len(u'return '):]
 539             else:
 540                 raise ExtractorError(
 541                     u'Cannot determine left side of statement in %r' % stmt)
 542
 543             v = interpret_expression(expr, local_vars, allow_recursion)
 544             return assign(v)
 545
 546         def interpret_expression(expr, local_vars, allow_recursion):
 547             if expr.isdigit():
 548                 return int(expr)
 549
 550             if expr.isalpha():
 551                 return local_vars[expr]
 552
 553             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 554             if m:
 555                 member = m.group('member')
 556                 val = local_vars[m.group('in')]
 557                 if member == 'split("")':
 558                     return list(val)
 559                 if member == 'join("")':
 560                     return u''.join(val)
 561                 if member == 'length':
 562                     return len(val)
 563                 if member == 'reverse()':
 564                     return val[::-1]
 565                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 566                 if slice_m:
 567                     idx = interpret_expression(
 568                         slice_m.group('idx'), local_vars, allow_recursion-1)
 569                     return val[idx:]
 570
 571             m = re.match(
 572                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 573             if m:
 574                 val = local_vars[m.group('in')]
 575                 idx = interpret_expression(m.group('idx'), local_vars,
 576                                            allow_recursion-1)
 577                 return val[idx]
 578
 579             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 580             if m:
 581                 a = interpret_expression(m.group('a'),
 582                                          local_vars, allow_recursion)
 583                 b = interpret_expression(m.group('b'),
 584                                          local_vars, allow_recursion)
 585                 return a % b
 586
 587             m = re.match(
 588                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 589             if m:
 590                 fname = m.group('func')
 591                 if fname not in functions:
 592                     functions[fname] = extract_function(fname)
 593                 argvals = [int(v) if v.isdigit() else local_vars[v]
 594                            for v in m.group('args').split(',')]
 595                 return functions[fname](argvals)
 596             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 597
 598         def extract_function(funcname):
 599             func_m = re.search(
 600                 r'function ' + re.escape(funcname) +
 601                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 602                 jscode)
 603             argnames = func_m.group('args').split(',')
 604
 605             def resf(args):
 606                 local_vars = dict(zip(argnames, args))
 607                 for stmt in func_m.group('code').split(';'):
 608                     res = interpret_statement(stmt, local_vars)
 609                 return res
 610             return resf
 611
 612         initial_function = extract_function(funcname)
 613         return lambda s: initial_function([s])
 614
 615     def _parse_sig_swf(self, file_contents):
 616         if file_contents[1:3] != b'WS':
 617             raise ExtractorError(
 618                 u'Not an SWF file; header is %r' % file_contents[:3])
 619         if file_contents[:1] == b'C':
 620             content = zlib.decompress(file_contents[8:])
 621         else:
 622             raise NotImplementedError(u'Unsupported compression format %r' %
 623                                       file_contents[:1])
 624
 625         def extract_tags(content):
 626             pos = 0
 627             while pos < len(content):
 628                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 629                 pos += 2
 630                 tag_code = header16 >> 6
 631                 tag_len = header16 & 0x3f
 632                 if tag_len == 0x3f:
 633                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 634                     pos += 4
 635                 assert pos+tag_len <= len(content)
 636                 yield (tag_code, content[pos:pos+tag_len])
 637                 pos += tag_len
 638
 639         code_tag = next(tag
 640                         for tag_code, tag in extract_tags(content)
 641                         if tag_code == 82)
 642         p = code_tag.index(b'\0', 4) + 1
 643         code_reader = io.BytesIO(code_tag[p:])
 644
 645         # Parse ABC (AVM2 ByteCode)
 646         def read_int(reader=None):
 647             if reader is None:
 648                 reader = code_reader
 649             res = 0
 650             shift = 0
 651             for _ in range(5):
 652                 buf = reader.read(1)
 653                 assert len(buf) == 1
 654                 b = struct.unpack('<B', buf)[0]
 655                 res = res | ((b & 0x7f) << shift)
 656                 if b & 0x80 == 0:
 657                     break
 658                 shift += 7
 659             return res
 660
 661         def u30(reader=None):
 662             res = read_int(reader)
 663             assert res & 0xf0000000 == 0
 664             return res
 665         u32 = read_int
 666
 667         def s32(reader=None):
 668             v = read_int(reader)
 669             if v & 0x80000000 != 0:
 670                 v = - ((v ^ 0xffffffff) + 1)
 671             return v
 672
 673         def read_string(reader=None):
 674             if reader is None:
 675                 reader = code_reader
 676             slen = u30(reader)
 677             resb = reader.read(slen)
 678             assert len(resb) == slen
 679             return resb.decode('utf-8')
 680
 681         def read_bytes(count, reader=None):
 682             if reader is None:
 683                 reader = code_reader
 684             resb = reader.read(count)
 685             assert len(resb) == count
 686             return resb
 687
 688         def read_byte(reader=None):
 689             resb = read_bytes(1, reader=reader)
 690             res = struct.unpack('<B', resb)[0]
 691             return res
 692
 693         # minor_version + major_version
 694         read_bytes(2 + 2)
 695
 696         # Constant pool
 697         int_count = u30()
 698         for _c in range(1, int_count):
 699             s32()
 700         uint_count = u30()
 701         for _c in range(1, uint_count):
 702             u32()
 703         double_count = u30()
 704         read_bytes((double_count-1) * 8)
 705         string_count = u30()
 706         constant_strings = [u'']
 707         for _c in range(1, string_count):
 708             s = read_string()
 709             constant_strings.append(s)
 710         namespace_count = u30()
 711         for _c in range(1, namespace_count):
 712             read_bytes(1)  # kind
 713             u30()  # name
 714         ns_set_count = u30()
 715         for _c in range(1, ns_set_count):
 716             count = u30()
 717             for _c2 in range(count):
 718                 u30()
 719         multiname_count = u30()
 720         MULTINAME_SIZES = {
 721             0x07: 2,  # QName
 722             0x0d: 2,  # QNameA
 723             0x0f: 1,  # RTQName
 724             0x10: 1,  # RTQNameA
 725             0x11: 0,  # RTQNameL
 726             0x12: 0,  # RTQNameLA
 727             0x09: 2,  # Multiname
 728             0x0e: 2,  # MultinameA
 729             0x1b: 1,  # MultinameL
 730             0x1c: 1,  # MultinameLA
 731         }
 732         multinames = [u'']
 733         for _c in range(1, multiname_count):
 734             kind = u30()
 735             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 736             if kind == 0x07:
 737                 u30()  # namespace_idx
 738                 name_idx = u30()
 739                 multinames.append(constant_strings[name_idx])
 740             else:
 741                 multinames.append('[MULTINAME kind: %d]' % kind)
 742                 for _c2 in range(MULTINAME_SIZES[kind]):
 743                     u30()
 744
 745         # Methods
 746         method_count = u30()
 747         MethodInfo = collections.namedtuple(
 748             'MethodInfo',
 749             ['NEED_ARGUMENTS', 'NEED_REST'])
 750         method_infos = []
 751         for method_id in range(method_count):
 752             param_count = u30()
 753             u30()  # return type
 754             for _ in range(param_count):
 755                 u30()  # param type
 756             u30()  # name index (always 0 for youtube)
 757             flags = read_byte()
 758             if flags & 0x08 != 0:
 759                 # Options present
 760                 option_count = u30()
 761                 for c in range(option_count):
 762                     u30()  # val
 763                     read_bytes(1)  # kind
 764             if flags & 0x80 != 0:
 765                 # Param names present
 766                 for _ in range(param_count):
 767                     u30()  # param name
 768             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 769             method_infos.append(mi)
 770
 771         # Metadata
 772         metadata_count = u30()
 773         for _c in range(metadata_count):
 774             u30()  # name
 775             item_count = u30()
 776             for _c2 in range(item_count):
 777                 u30()  # key
 778                 u30()  # value
 779
 780         def parse_traits_info():
 781             trait_name_idx = u30()
 782             kind_full = read_byte()
 783             kind = kind_full & 0x0f
 784             attrs = kind_full >> 4
 785             methods = {}
 786             if kind in [0x00, 0x06]:  # Slot or Const
 787                 u30()  # Slot id
 788                 u30()  # type_name_idx
 789                 vindex = u30()
 790                 if vindex != 0:
 791                     read_byte()  # vkind
 792             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 793                 u30()  # disp_id
 794                 method_idx = u30()
 795                 methods[multinames[trait_name_idx]] = method_idx
 796             elif kind == 0x04:  # Class
 797                 u30()  # slot_id
 798                 u30()  # classi
 799             elif kind == 0x05:  # Function
 800                 u30()  # slot_id
 801                 function_idx = u30()
 802                 methods[function_idx] = multinames[trait_name_idx]
 803             else:
 804                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 805
 806             if attrs & 0x4 != 0:  # Metadata present
 807                 metadata_count = u30()
 808                 for _c3 in range(metadata_count):
 809                     u30()  # metadata index
 810
 811             return methods
 812
 813         # Classes
 814         TARGET_CLASSNAME = u'SignatureDecipher'
 815         searched_idx = multinames.index(TARGET_CLASSNAME)
 816         searched_class_id = None
 817         class_count = u30()
 818         for class_id in range(class_count):
 819             name_idx = u30()
 820             if name_idx == searched_idx:
 821                 # We found the class we're looking for!
 822                 searched_class_id = class_id
 823             u30()  # super_name idx
 824             flags = read_byte()
 825             if flags & 0x08 != 0:  # Protected namespace is present
 826                 u30()  # protected_ns_idx
 827             intrf_count = u30()
 828             for _c2 in range(intrf_count):
 829                 u30()
 830             u30()  # iinit
 831             trait_count = u30()
 832             for _c2 in range(trait_count):
 833                 parse_traits_info()
 834
 835         if searched_class_id is None:
 836             raise ExtractorError(u'Target class %r not found' %
 837                                  TARGET_CLASSNAME)
 838
 839         method_names = {}
 840         method_idxs = {}
 841         for class_id in range(class_count):
 842             u30()  # cinit
 843             trait_count = u30()
 844             for _c2 in range(trait_count):
 845                 trait_methods = parse_traits_info()
 846                 if class_id == searched_class_id:
 847                     method_names.update(trait_methods.items())
 848                     method_idxs.update(dict(
 849                         (idx, name)
 850                         for name, idx in trait_methods.items()))
 851
 852         # Scripts
 853         script_count = u30()
 854         for _c in range(script_count):
 855             u30()  # init
 856             trait_count = u30()
 857             for _c2 in range(trait_count):
 858                 parse_traits_info()
 859
 860         # Method bodies
 861         method_body_count = u30()
 862         Method = collections.namedtuple('Method', ['code', 'local_count'])
 863         methods = {}
 864         for _c in range(method_body_count):
 865             method_idx = u30()
 866             u30()  # max_stack
 867             local_count = u30()
 868             u30()  # init_scope_depth
 869             u30()  # max_scope_depth
 870             code_length = u30()
 871             code = read_bytes(code_length)
 872             if method_idx in method_idxs:
 873                 m = Method(code, local_count)
 874                 methods[method_idxs[method_idx]] = m
 875             exception_count = u30()
 876             for _c2 in range(exception_count):
 877                 u30()  # from
 878                 u30()  # to
 879                 u30()  # target
 880                 u30()  # exc_type
 881                 u30()  # var_name
 882             trait_count = u30()
 883             for _c2 in range(trait_count):
 884                 parse_traits_info()
 885
 886         assert p + code_reader.tell() == len(code_tag)
 887         assert len(methods) == len(method_idxs)
 888
 889         method_pyfunctions = {}
 890
 891         def extract_function(func_name):
 892             if func_name in method_pyfunctions:
 893                 return method_pyfunctions[func_name]
 894             if func_name not in methods:
 895                 raise ExtractorError(u'Cannot find function %r' % func_name)
 896             m = methods[func_name]
 897
 898             def resfunc(args):
 899                 registers = ['(this)'] + list(args) + [None] * m.local_count
 900                 stack = []
 901                 coder = io.BytesIO(m.code)
 902                 while True:
 903                     opcode = struct.unpack('!B', coder.read(1))[0]
 904                     if opcode == 36:  # pushbyte
 905                         v = struct.unpack('!B', coder.read(1))[0]
 906                         stack.append(v)
 907                     elif opcode == 44:  # pushstring
 908                         idx = u30(coder)
 909                         stack.append(constant_strings[idx])
 910                     elif opcode == 48:  # pushscope
 911                         # We don't implement the scope register, so we'll just
 912                         # ignore the popped value
 913                         stack.pop()
 914                     elif opcode == 70:  # callproperty
 915                         index = u30(coder)
 916                         mname = multinames[index]
 917                         arg_count = u30(coder)
 918                         args = list(reversed(
 919                             [stack.pop() for _ in range(arg_count)]))
 920                         obj = stack.pop()
 921                         if mname == u'split':
 922                             assert len(args) == 1
 923                             assert isinstance(args[0], compat_str)
 924                             assert isinstance(obj, compat_str)
 925                             if args[0] == u'':
 926                                 res = list(obj)
 927                             else:
 928                                 res = obj.split(args[0])
 929                             stack.append(res)
 930                         elif mname == u'slice':
 931                             assert len(args) == 1
 932                             assert isinstance(args[0], int)
 933                             assert isinstance(obj, list)
 934                             res = obj[args[0]:]
 935                             stack.append(res)
 936                         elif mname == u'join':
 937                             assert len(args) == 1
 938                             assert isinstance(args[0], compat_str)
 939                             assert isinstance(obj, list)
 940                             res = args[0].join(obj)
 941                             stack.append(res)
 942                         elif mname in method_pyfunctions:
 943                             stack.append(method_pyfunctions[mname](args))
 944                         else:
 945                             raise NotImplementedError(
 946                                 u'Unsupported property %r on %r'
 947                                 % (mname, obj))
 948                     elif opcode == 72:  # returnvalue
 949                         res = stack.pop()
 950                         return res
 951                     elif opcode == 79:  # callpropvoid
 952                         index = u30(coder)
 953                         mname = multinames[index]
 954                         arg_count = u30(coder)
 955                         args = list(reversed(
 956                             [stack.pop() for _ in range(arg_count)]))
 957                         obj = stack.pop()
 958                         if mname == u'reverse':
 959                             assert isinstance(obj, list)
 960                             obj.reverse()
 961                         else:
 962                             raise NotImplementedError(
 963                                 u'Unsupported (void) property %r on %r'
 964                                 % (mname, obj))
 965                     elif opcode == 93:  # findpropstrict
 966                         index = u30(coder)
 967                         mname = multinames[index]
 968                         res = extract_function(mname)
 969                         stack.append(res)
 970                     elif opcode == 97:  # setproperty
 971                         index = u30(coder)
 972                         value = stack.pop()
 973                         idx = stack.pop()
 974                         obj = stack.pop()
 975                         assert isinstance(obj, list)
 976                         assert isinstance(idx, int)
 977                         obj[idx] = value
 978                     elif opcode == 98:  # getlocal
 979                         index = u30(coder)
 980                         stack.append(registers[index])
 981                     elif opcode == 99:  # setlocal
 982                         index = u30(coder)
 983                         value = stack.pop()
 984                         registers[index] = value
 985                     elif opcode == 102:  # getproperty
 986                         index = u30(coder)
 987                         pname = multinames[index]
 988                         if pname == u'length':
 989                             obj = stack.pop()
 990                             assert isinstance(obj, list)
 991                             stack.append(len(obj))
 992                         else:  # Assume attribute access
 993                             idx = stack.pop()
 994                             assert isinstance(idx, int)
 995                             obj = stack.pop()
 996                             assert isinstance(obj, list)
 997                             stack.append(obj[idx])
 998                     elif opcode == 128:  # coerce
 999                         u30(coder)
1000                     elif opcode == 133:  # coerce_s
1001                         assert isinstance(stack[-1], (type(None), compat_str))
1002                     elif opcode == 164:  # modulo
1003                         value2 = stack.pop()
1004                         value1 = stack.pop()
1005                         res = value1 % value2
1006                         stack.append(res)
1007                     elif opcode == 208:  # getlocal_0
1008                         stack.append(registers[0])
1009                     elif opcode == 209:  # getlocal_1
1010                         stack.append(registers[1])
1011                     elif opcode == 210:  # getlocal_2
1012                         stack.append(registers[2])
1013                     elif opcode == 211:  # getlocal_3
1014                         stack.append(registers[3])
1015                     elif opcode == 214:  # setlocal_2
1016                         registers[2] = stack.pop()
1017                     elif opcode == 215:  # setlocal_3
1018                         registers[3] = stack.pop()
1019                     else:
1020                         raise NotImplementedError(
1021                             u'Unsupported opcode %d' % opcode)
1022
1023             method_pyfunctions[func_name] = resfunc
1024             return resfunc
1025
1026         initial_function = extract_function(u'decipher')
1027         return lambda s: initial_function([s])
1028
1029     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1030         """Turn the encrypted s field into a working signature"""
1031
1032         if player_url is not None:
1033             if player_url.startswith(u'//'):
1034                 player_url = u'https:' + player_url
1035             try:
1036                 player_id = (player_url, len(s))
1037                 if player_id not in self._player_cache:
1038                     func = self._extract_signature_function(
1039                         video_id, player_url, len(s)
1040                     )
1041                     self._player_cache[player_id] = func
1042                 func = self._player_cache[player_id]
1043                 if self._downloader.params.get('youtube_print_sig_code'):
1044                     self._print_sig_code(func, len(s))
1045                 return func(s)
1046             except Exception:
1047                 tb = traceback.format_exc()
1048                 self._downloader.report_warning(
1049                     u'Automatic signature extraction failed: ' + tb)
1050
1051             self._downloader.report_warning(
1052                 u'Warning: Falling back to static signature algorithm')
1053
1054         return self._static_decrypt_signature(
1055             s, video_id, player_url, age_gate)
1056
1057     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1058         if age_gate:
1059             # The videos with age protection use another player, so the
1060             # algorithms can be different.
1061             if len(s) == 86:
1062                 return s[2:63] + s[82] + s[64:82] + s[63]
1063
1064         if len(s) == 93:
1065             return s[86:29:-1] + s[88] + s[28:5:-1]
1066         elif len(s) == 92:
1067             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1068         elif len(s) == 91:
1069             return s[84:27:-1] + s[86] + s[26:5:-1]
1070         elif len(s) == 90:
1071             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1072         elif len(s) == 89:
1073             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1074         elif len(s) == 88:
1075             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1076         elif len(s) == 87:
1077             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1078         elif len(s) == 86:
1079             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1080         elif len(s) == 85:
1081             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1082         elif len(s) == 84:
1083             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1084         elif len(s) == 83:
1085             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1086         elif len(s) == 82:
1087             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1088         elif len(s) == 81:
1089             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1090         elif len(s) == 80:
1091             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1092         elif len(s) == 79:
1093             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1094
1095         else:
1096             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1097
1098     def _get_available_subtitles(self, video_id, webpage):
1099         try:
1100             sub_list = self._download_webpage(
1101                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1102                 video_id, note=False)
1103         except ExtractorError as err:
1104             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1105             return {}
1106         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1107
1108         sub_lang_list = {}
1109         for l in lang_list:
1110             lang = l[1]
1111             params = compat_urllib_parse.urlencode({
1112                 'lang': lang,
1113                 'v': video_id,
1114                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1115                 'name': l[0].encode('utf-8'),
1116             })
1117             url = u'http://www.youtube.com/api/timedtext?' + params
1118             sub_lang_list[lang] = url
1119         if not sub_lang_list:
1120             self._downloader.report_warning(u'video doesn\'t have subtitles')
1121             return {}
1122         return sub_lang_list
1123
1124     def _get_available_automatic_caption(self, video_id, webpage):
1125         """We need the webpage for getting the captions url, pass it as an
1126            argument to speed up the process."""
1127         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1128         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1129         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1130         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1131         if mobj is None:
1132             self._downloader.report_warning(err_msg)
1133             return {}
1134         player_config = json.loads(mobj.group(1))
1135         try:
1136             args = player_config[u'args']
1137             caption_url = args[u'ttsurl']
1138             timestamp = args[u'timestamp']
1139             # We get the available subtitles
1140             list_params = compat_urllib_parse.urlencode({
1141                 'type': 'list',
1142                 'tlangs': 1,
1143                 'asrs': 1,
1144             })
1145             list_url = caption_url + '&' + list_params
1146             caption_list = self._download_xml(list_url, video_id)
1147             original_lang_node = caption_list.find('track')
1148             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1149                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1150                 return {}
1151             original_lang = original_lang_node.attrib['lang_code']
1152
1153             sub_lang_list = {}
1154             for lang_node in caption_list.findall('target'):
1155                 sub_lang = lang_node.attrib['lang_code']
1156                 params = compat_urllib_parse.urlencode({
1157                     'lang': original_lang,
1158                     'tlang': sub_lang,
1159                     'fmt': sub_format,
1160                     'ts': timestamp,
1161                     'kind': 'asr',
1162                 })
1163                 sub_lang_list[sub_lang] = caption_url + '&' + params
1164             return sub_lang_list
1165         # An extractor error can be raise by the download process if there are
1166         # no automatic captions but there are subtitles
1167         except (KeyError, ExtractorError):
1168             self._downloader.report_warning(err_msg)
1169             return {}
1170
1171     def _print_formats(self, formats):
1172         print('Available formats:')
1173         for x in formats:
1174             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1175                                         self._video_dimensions.get(x, '???'),
1176                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1177
1178     def _extract_id(self, url):
1179         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1180         if mobj is None:
1181             raise ExtractorError(u'Invalid URL: %s' % url)
1182         video_id = mobj.group(2)
1183         return video_id
1184
1185     def _get_video_url_list(self, url_map):
1186         """
1187         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1188         with the requested formats.
1189         """
1190         req_format = self._downloader.params.get('format', None)
1191         format_limit = self._downloader.params.get('format_limit', None)
1192         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1193         if format_limit is not None and format_limit in available_formats:
1194             format_list = available_formats[available_formats.index(format_limit):]
1195         else:
1196             format_list = available_formats
1197         existing_formats = [x for x in format_list if x in url_map]
1198         if len(existing_formats) == 0:
1199             raise ExtractorError(u'no known formats available for video')
1200         if self._downloader.params.get('listformats', None):
1201             self._print_formats(existing_formats)
1202             return
1203         if req_format is None or req_format == 'best':
1204             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1205         elif req_format == 'worst':
1206             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1207         elif req_format in ('-1', 'all'):
1208             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1209         else:
1210             # Specific formats. We pick the first in a slash-delimeted sequence.
1211             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1212             # available in the specified format. For example,
1213             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1214             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1215             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1216             req_formats = req_format.split('/')
1217             video_url_list = None
1218             for rf in req_formats:
1219                 if rf in url_map:
1220                     video_url_list = [(rf, url_map[rf])]
1221                     break
1222                 if rf in self._video_formats_map:
1223                     for srf in self._video_formats_map[rf]:
1224                         if srf in url_map:
1225                             video_url_list = [(srf, url_map[srf])]
1226                             break
1227                     else:
1228                         continue
1229                     break
1230             if video_url_list is None:
1231                 raise ExtractorError(u'requested format not available')
1232         return video_url_list
1233
1234     def _extract_from_m3u8(self, manifest_url, video_id):
1235         url_map = {}
1236         def _get_urls(_manifest):
1237             lines = _manifest.split('\n')
1238             urls = filter(lambda l: l and not l.startswith('#'),
1239                             lines)
1240             return urls
1241         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1242         formats_urls = _get_urls(manifest)
1243         for format_url in formats_urls:
1244             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1245             url_map[itag] = format_url
1246         return url_map
1247
1248     def _extract_annotations(self, video_id):
1249         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1250         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1251
1252     def _real_extract(self, url):
1253         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1254         mobj = re.search(self._NEXT_URL_RE, url)
1255         if mobj:
1256             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1257         video_id = self._extract_id(url)
1258
1259         # Get video webpage
1260         self.report_video_webpage_download(video_id)
1261         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1262         request = compat_urllib_request.Request(url)
1263         try:
1264             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1265         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1266             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1267
1268         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1269
1270         # Attempt to extract SWF player URL
1271         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1272         if mobj is not None:
1273             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1274         else:
1275             player_url = None
1276
1277         # Get video info
1278         self.report_video_info_webpage_download(video_id)
1279         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1280             self.report_age_confirmation()
1281             age_gate = True
1282             # We simulate the access to the video from www.youtube.com/v/{video_id}
1283             # this can be viewed without login into Youtube
1284             data = compat_urllib_parse.urlencode({'video_id': video_id,
1285                                                   'el': 'player_embedded',
1286                                                   'gl': 'US',
1287                                                   'hl': 'en',
1288                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1289                                                   'asv': 3,
1290                                                   'sts':'1588',
1291                                                   })
1292             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1293             video_info_webpage = self._download_webpage(video_info_url, video_id,
1294                                     note=False,
1295                                     errnote='unable to download video info webpage')
1296             video_info = compat_parse_qs(video_info_webpage)
1297         else:
1298             age_gate = False
1299             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1300                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1301                         % (video_id, el_type))
1302                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1303                                         note=False,
1304                                         errnote='unable to download video info webpage')
1305                 video_info = compat_parse_qs(video_info_webpage)
1306                 if 'token' in video_info:
1307                     break
1308         if 'token' not in video_info:
1309             if 'reason' in video_info:
1310                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1311             else:
1312                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1313
1314         if 'view_count' in video_info:
1315             view_count = int(video_info['view_count'][0])
1316         else:
1317             view_count = None
1318
1319         # Check for "rental" videos
1320         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1321             raise ExtractorError(u'"rental" videos not supported')
1322
1323         # Start extracting information
1324         self.report_information_extraction(video_id)
1325
1326         # uploader
1327         if 'author' not in video_info:
1328             raise ExtractorError(u'Unable to extract uploader name')
1329         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1330
1331         # uploader_id
1332         video_uploader_id = None
1333         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1334         if mobj is not None:
1335             video_uploader_id = mobj.group(1)
1336         else:
1337             self._downloader.report_warning(u'unable to extract uploader nickname')
1338
1339         # title
1340         if 'title' in video_info:
1341             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1342         else:
1343             self._downloader.report_warning(u'Unable to extract video title')
1344             video_title = u'_'
1345
1346         # thumbnail image
1347         # We try first to get a high quality image:
1348         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1349                             video_webpage, re.DOTALL)
1350         if m_thumb is not None:
1351             video_thumbnail = m_thumb.group(1)
1352         elif 'thumbnail_url' not in video_info:
1353             self._downloader.report_warning(u'unable to extract video thumbnail')
1354             video_thumbnail = None
1355         else:   # don't panic if we can't find it
1356             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1357
1358         # upload date
1359         upload_date = None
1360         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1361         if mobj is not None:
1362             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1363             upload_date = unified_strdate(upload_date)
1364
1365         # description
1366         video_description = get_element_by_id("eow-description", video_webpage)
1367         if video_description:
1368             video_description = clean_html(video_description)
1369         else:
1370             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1371             if fd_mobj:
1372                 video_description = unescapeHTML(fd_mobj.group(1))
1373             else:
1374                 video_description = u''
1375
1376         # subtitles
1377         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1378
1379         if self._downloader.params.get('listsubtitles', False):
1380             self._list_available_subtitles(video_id, video_webpage)
1381             return
1382
1383         if 'length_seconds' not in video_info:
1384             self._downloader.report_warning(u'unable to extract video duration')
1385             video_duration = ''
1386         else:
1387             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1388
1389         # annotations
1390         video_annotations = None
1391         if self._downloader.params.get('writeannotations', False):
1392                 video_annotations = self._extract_annotations(video_id)
1393
1394         # Decide which formats to download
1395
1396         try:
1397             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1398             if not mobj:
1399                 raise ValueError('Could not find vevo ID')
1400             info = json.loads(mobj.group(1))
1401             args = info['args']
1402             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1403             # this signatures are encrypted
1404             if 'url_encoded_fmt_stream_map' not in args:
1405                 raise ValueError(u'No stream_map present')  # caught below
1406             re_signature = re.compile(r'[&,]s=')
1407             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1408             if m_s is not None:
1409                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1410                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1411             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1412             if m_s is not None:
1413                 if 'adaptive_fmts' in video_info:
1414                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1415                 else:
1416                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1417         except ValueError:
1418             pass
1419
1420         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1421             self.report_rtmp_download()
1422             video_url_list = [(None, video_info['conn'][0])]
1423         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1424             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1425             if 'rtmpe%3Dyes' in encoded_url_map:
1426                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1427             url_map = {}
1428             for url_data_str in encoded_url_map.split(','):
1429                 url_data = compat_parse_qs(url_data_str)
1430                 if 'itag' in url_data and 'url' in url_data:
1431                     url = url_data['url'][0]
1432                     if 'sig' in url_data:
1433                         url += '&signature=' + url_data['sig'][0]
1434                     elif 's' in url_data:
1435                         encrypted_sig = url_data['s'][0]
1436                         if self._downloader.params.get('verbose'):
1437                             if age_gate:
1438                                 if player_url is None:
1439                                     player_version = 'unknown'
1440                                 else:
1441                                     player_version = self._search_regex(
1442                                         r'-(.+)\.swf$', player_url,
1443                                         u'flash player', fatal=False)
1444                                 player_desc = 'flash player %s' % player_version
1445                             else:
1446                                 player_version = self._search_regex(
1447                                     r'html5player-(.+?)\.js', video_webpage,
1448                                     'html5 player', fatal=False)
1449                                 player_desc = u'html5 player %s' % player_version
1450
1451                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1452                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1453                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1454
1455                         if not age_gate:
1456                             jsplayer_url_json = self._search_regex(
1457                                 r'"assets":.+?"js":\s*("[^"]+")',
1458                                 video_webpage, u'JS player URL')
1459                             player_url = json.loads(jsplayer_url_json)
1460
1461                         signature = self._decrypt_signature(
1462                             encrypted_sig, video_id, player_url, age_gate)
1463                         url += '&signature=' + signature
1464                     if 'ratebypass' not in url:
1465                         url += '&ratebypass=yes'
1466                     url_map[url_data['itag'][0]] = url
1467             video_url_list = self._get_video_url_list(url_map)
1468             if not video_url_list:
1469                 return
1470         elif video_info.get('hlsvp'):
1471             manifest_url = video_info['hlsvp'][0]
1472             url_map = self._extract_from_m3u8(manifest_url, video_id)
1473             video_url_list = self._get_video_url_list(url_map)
1474             if not video_url_list:
1475                 return
1476
1477         else:
1478             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1479
1480         results = []
1481         for itag, video_real_url in video_url_list:
1482             # Extension
1483             video_extension = self._video_extensions.get(itag, 'flv')
1484
1485             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1486                                               self._video_dimensions.get(itag, '???'),
1487                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1488
1489             results.append({
1490                 'id':       video_id,
1491                 'url':      video_real_url,
1492                 'uploader': video_uploader,
1493                 'uploader_id': video_uploader_id,
1494                 'upload_date':  upload_date,
1495                 'title':    video_title,
1496                 'ext':      video_extension,
1497                 'format':   video_format,
1498                 'format_id': itag,
1499                 'thumbnail':    video_thumbnail,
1500                 'description':  video_description,
1501                 'player_url':   player_url,
1502                 'subtitles':    video_subtitles,
1503                 'duration':     video_duration,
1504                 'age_limit':    18 if age_gate else 0,
1505                 'annotations':  video_annotations,
1506                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1507                 'view_count': view_count,
1508             })
1509         return results
1510
1511 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1512     IE_DESC = u'YouTube.com playlists'
1513     _VALID_URL = r"""(?:
1514                         (?:https?://)?
1515                         (?:\w+\.)?
1516                         youtube\.com/
1517                         (?:
1518                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1519                            \? (?:.*?&)*? (?:p|a|list)=
1520                         |  p/
1521                         )
1522                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1523                         .*
1524                      |
1525                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1526                      )"""
1527     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
1528     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1529     _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1530     IE_NAME = u'youtube:playlist'
1531
1532     @classmethod
1533     def suitable(cls, url):
1534         """Receives a URL and returns True if suitable for this IE."""
1535         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1536
1537     def _real_initialize(self):
1538         self._login()
1539
1540     def _real_extract(self, url):
1541         # Extract playlist id
1542         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1543         if mobj is None:
1544             raise ExtractorError(u'Invalid URL: %s' % url)
1545         playlist_id = mobj.group(1) or mobj.group(2)
1546
1547         # Check if it's a video-specific URL
1548         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1549         if 'v' in query_dict:
1550             video_id = query_dict['v'][0]
1551             if self._downloader.params.get('noplaylist'):
1552                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1553                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1554             else:
1555                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1556
1557         # Extract the video ids from the playlist pages
1558         ids = []
1559
1560         for page_num in itertools.count(1):
1561             url = self._TEMPLATE_URL % (playlist_id, page_num)
1562             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1563             matches = re.finditer(self._VIDEO_RE, page)
1564             # We remove the duplicates and the link with index 0
1565             # (it's not the first video of the playlist)
1566             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1567             ids.extend(new_ids)
1568
1569             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1570                 break
1571
1572         playlist_title = self._og_search_title(page)
1573
1574         url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1575                        for vid_id in ids]
1576         return self.playlist_result(url_results, playlist_id, playlist_title)
1577
1578
1579 class YoutubeChannelIE(InfoExtractor):
1580     IE_DESC = u'YouTube.com channels'
1581     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1582     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1583     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1584     IE_NAME = u'youtube:channel'
1585
1586     def extract_videos_from_page(self, page):
1587         ids_in_page = []
1588         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1589             if mobj.group(1) not in ids_in_page:
1590                 ids_in_page.append(mobj.group(1))
1591         return ids_in_page
1592
1593     def _real_extract(self, url):
1594         # Extract channel id
1595         mobj = re.match(self._VALID_URL, url)
1596         if mobj is None:
1597             raise ExtractorError(u'Invalid URL: %s' % url)
1598
1599         # Download channel page
1600         channel_id = mobj.group(1)
1601         video_ids = []
1602         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1603         channel_page = self._download_webpage(url, channel_id)
1604         if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
1605             autogenerated = True
1606         else:
1607             autogenerated = False
1608
1609         if autogenerated:
1610             # The videos are contained in a single page
1611             # the ajax pages can't be used, they are empty
1612             video_ids = self.extract_videos_from_page(channel_page)
1613         else:
1614             # Download all channel pages using the json-based channel_ajax query
1615             for pagenum in itertools.count(1):
1616                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1617                 page = self._download_webpage(url, channel_id,
1618                                               u'Downloading page #%s' % pagenum)
1619
1620                 page = json.loads(page)
1621
1622                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1623                 video_ids.extend(ids_in_page)
1624
1625                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1626                     break
1627
1628         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1629
1630         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1631                        for video_id in video_ids]
1632         return self.playlist_result(url_entries, channel_id)
1633
1634
1635 class YoutubeUserIE(InfoExtractor):
1636     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1637     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1638     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1639     _GDATA_PAGE_SIZE = 50
1640     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1641     IE_NAME = u'youtube:user'
1642
1643     @classmethod
1644     def suitable(cls, url):
1645         # Don't return True if the url can be extracted with other youtube
1646         # extractor, the regex would is too permissive and it would match.
1647         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1648         if any(ie.suitable(url) for ie in other_ies): return False
1649         else: return super(YoutubeUserIE, cls).suitable(url)
1650
1651     def _real_extract(self, url):
1652         # Extract username
1653         mobj = re.match(self._VALID_URL, url)
1654         if mobj is None:
1655             raise ExtractorError(u'Invalid URL: %s' % url)
1656
1657         username = mobj.group(1)
1658
1659         # Download video ids using YouTube Data API. Result size per
1660         # query is limited (currently to 50 videos) so we need to query
1661         # page by page until there are no video ids - it means we got
1662         # all of them.
1663
1664         video_ids = []
1665
1666         for pagenum in itertools.count(0):
1667             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1668
1669             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1670             page = self._download_webpage(gdata_url, username,
1671                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1672
1673             try:
1674                 response = json.loads(page)
1675             except ValueError as err:
1676                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1677             if 'entry' not in response['feed']:
1678                 # Number of videos is a multiple of self._MAX_RESULTS
1679                 break
1680
1681             # Extract video identifiers
1682             ids_in_page = []
1683             for entry in response['feed']['entry']:
1684                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1685             video_ids.extend(ids_in_page)
1686
1687             # A little optimization - if current page is not
1688             # "full", ie. does not contain PAGE_SIZE video ids then
1689             # we can assume that this page is the last one - there
1690             # are no more ids on further pages - no need to query
1691             # again.
1692
1693             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1694                 break
1695
1696         url_results = [
1697             self.url_result(video_id, 'Youtube', video_id=video_id)
1698             for video_id in video_ids]
1699         return self.playlist_result(url_results, playlist_title=username)
1700
1701
1702 class YoutubeSearchIE(SearchInfoExtractor):
1703     IE_DESC = u'YouTube.com searches'
1704     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1705     _MAX_RESULTS = 1000
1706     IE_NAME = u'youtube:search'
1707     _SEARCH_KEY = 'ytsearch'
1708
1709     def report_download_page(self, query, pagenum):
1710         """Report attempt to download search page with given number."""
1711         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1712
1713     def _get_n_results(self, query, n):
1714         """Get a specified number of results for a query"""
1715
1716         video_ids = []
1717         pagenum = 0
1718         limit = n
1719
1720         while (50 * pagenum) < limit:
1721             self.report_download_page(query, pagenum+1)
1722             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1723             request = compat_urllib_request.Request(result_url)
1724             try:
1725                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1726             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1727                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1728             api_response = json.loads(data)['data']
1729
1730             if not 'items' in api_response:
1731                 raise ExtractorError(u'[youtube] No video results')
1732
1733             new_ids = list(video['id'] for video in api_response['items'])
1734             video_ids += new_ids
1735
1736             limit = min(n, api_response['totalItems'])
1737             pagenum += 1
1738
1739         if len(video_ids) > n:
1740             video_ids = video_ids[:n]
1741         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1742                   for video_id in video_ids]
1743         return self.playlist_result(videos, query)
1744
1745 class YoutubeSearchDateIE(YoutubeSearchIE):
1746     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1747     _SEARCH_KEY = 'ytsearchdate'
1748     IE_DESC = u'YouTube.com searches, newest videos first'
1749
1750 class YoutubeShowIE(InfoExtractor):
1751     IE_DESC = u'YouTube.com (multi-season) shows'
1752     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1753     IE_NAME = u'youtube:show'
1754
1755     def _real_extract(self, url):
1756         mobj = re.match(self._VALID_URL, url)
1757         show_name = mobj.group(1)
1758         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1759         # There's one playlist for each season of the show
1760         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1761         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1762         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1763
1764
1765 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1766     """
1767     Base class for extractors that fetch info from
1768     http://www.youtube.com/feed_ajax
1769     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1770     """
1771     _LOGIN_REQUIRED = True
1772     _PAGING_STEP = 30
1773     # use action_load_personal_feed instead of action_load_system_feed
1774     _PERSONAL_FEED = False
1775
1776     @property
1777     def _FEED_TEMPLATE(self):
1778         action = 'action_load_system_feed'
1779         if self._PERSONAL_FEED:
1780             action = 'action_load_personal_feed'
1781         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1782
1783     @property
1784     def IE_NAME(self):
1785         return u'youtube:%s' % self._FEED_NAME
1786
1787     def _real_initialize(self):
1788         self._login()
1789
1790     def _real_extract(self, url):
1791         feed_entries = []
1792         # The step argument is available only in 2.7 or higher
1793         for i in itertools.count(0):
1794             paging = i*self._PAGING_STEP
1795             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1796                                           u'%s feed' % self._FEED_NAME,
1797                                           u'Downloading page %s' % i)
1798             info = json.loads(info)
1799             feed_html = info['feed_html']
1800             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1801             ids = orderedSet(m.group(1) for m in m_ids)
1802             feed_entries.extend(
1803                 self.url_result(video_id, 'Youtube', video_id=video_id)
1804                 for video_id in ids)
1805             if info['paging'] is None:
1806                 break
1807         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1808
1809 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1810     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1811     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1812     _FEED_NAME = 'subscriptions'
1813     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1814
1815 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1816     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1817     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1818     _FEED_NAME = 'recommended'
1819     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1820
1821 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1822     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1823     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1824     _FEED_NAME = 'watch_later'
1825     _PLAYLIST_TITLE = u'Youtube Watch Later'
1826     _PAGING_STEP = 100
1827     _PERSONAL_FEED = True
1828
1829 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1830     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1831     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1832     _FEED_NAME = 'history'
1833     _PERSONAL_FEED = True
1834     _PLAYLIST_TITLE = u'Youtube Watch History'
1835
1836     def _real_extract(self, url):
1837         webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
1838         data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
1839         # The step is actually a ridiculously big number (like 1374343569725646)
1840         self._PAGING_STEP = int(data_paging)
1841         return super(YoutubeHistoryIE, self)._real_extract(url)
1842
1843 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1844     IE_NAME = u'youtube:favorites'
1845     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1846     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1847     _LOGIN_REQUIRED = True
1848
1849     def _real_extract(self, url):
1850         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1851         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1852         return self.url_result(playlist_id, 'YoutubePlaylist')
1853
1854
1855 class YoutubeTruncatedURLIE(InfoExtractor):
1856     IE_NAME = 'youtube:truncated_url'
1857     IE_DESC = False  # Do not list
1858     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1859
1860     def _real_extract(self, url):
1861         raise ExtractorError(
1862             u'Did you forget to quote the URL? Remember that & is a meta '
1863             u'character in most shells, so you want to put the URL in quotes, '
1864             u'like  youtube-dl '
1865             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1866             u' (or simply  youtube-dl BaW_jenozKc  ).',
1867             expected=True)