youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     clean_html,
  31     dict_get,
  32     error_to_compat_str,
  33     ExtractorError,
  34     float_or_none,
  35     get_element_by_attribute,
  36     get_element_by_id,
  37     int_or_none,
  38     mimetype2ext,
  39     orderedSet,
  40     parse_codecs,
  41     parse_duration,
  42     qualities,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     def _set_language(self):
  74         self._set_cookie(
  75             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  76             # YouTube sets the expire time to about two months
  77             expire_time=time.time() + 2 * 30 * 24 * 3600)
  78
  79     def _ids_to_results(self, ids):
  80         return [
  81             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  82             for vid_id in ids]
  83
  84     def _login(self):
  85         """
  86         Attempt to log in to YouTube.
  87         True is returned if successful or skipped.
  88         False is returned if login failed.
  89
  90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  91         """
  92         username, password = self._get_login_info()
  93         # No authentication to be performed
  94         if username is None:
  95             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  96                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  97             return True
  98
  99         login_page = self._download_webpage(
 100             self._LOGIN_URL, None,
 101             note='Downloading login page',
 102             errnote='unable to fetch login page', fatal=False)
 103         if login_page is False:
 104             return
 105
 106         login_form = self._hidden_inputs(login_page)
 107
 108         def req(url, f_req, note, errnote):
 109             data = login_form.copy()
 110             data.update({
 111                 'pstMsg': 1,
 112                 'checkConnection': 'youtube',
 113                 'checkedDomains': 'youtube',
 114                 'hl': 'en',
 115                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 116                 'f.req': json.dumps(f_req),
 117                 'flowName': 'GlifWebSignIn',
 118                 'flowEntry': 'ServiceLogin',
 119                 # TODO: reverse actual botguard identifier generation algo
 120                 'bgRequest': '["identifier",""]',
 121             })
 122             return self._download_json(
 123                 url, None, note=note, errnote=errnote,
 124                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 125                 fatal=False,
 126                 data=urlencode_postdata(data), headers={
 127                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 128                     'Google-Accounts-XSRF': 1,
 129                 })
 130
 131         def warn(message):
 132             self._downloader.report_warning(message)
 133
 134         lookup_req = [
 135             username,
 136             None, [], None, 'US', None, None, 2, False, True,
 137             [
 138                 None, None,
 139                 [2, 1, None, 1,
 140                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 141                  None, [], 4],
 142                 1, [None, None, []], None, None, None, True
 143             ],
 144             username,
 145         ]
 146
 147         lookup_results = req(
 148             self._LOOKUP_URL, lookup_req,
 149             'Looking up account info', 'Unable to look up account info')
 150
 151         if lookup_results is False:
 152             return False
 153
 154         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 155         if not user_hash:
 156             warn('Unable to extract user hash')
 157             return False
 158
 159         challenge_req = [
 160             user_hash,
 161             None, 1, None, [1, None, None, None, [password, None, True]],
 162             [
 163                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 164                 1, [None, None, []], None, None, None, True
 165             ]]
 166
 167         challenge_results = req(
 168             self._CHALLENGE_URL, challenge_req,
 169             'Logging in', 'Unable to log in')
 170
 171         if challenge_results is False:
 172             return
 173
 174         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 175         if login_res:
 176             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 177             warn(
 178                 'Unable to login: %s' % 'Invalid password'
 179                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 180             return False
 181
 182         res = try_get(challenge_results, lambda x: x[0][-1], list)
 183         if not res:
 184             warn('Unable to extract result entry')
 185             return False
 186
 187         login_challenge = try_get(res, lambda x: x[0][0], list)
 188         if login_challenge:
 189             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 190             if challenge_str == 'TWO_STEP_VERIFICATION':
 191                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 192                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 193                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 194                 if status == 'QUOTA_EXCEEDED':
 195                     warn('Exceeded the limit of TFA codes, try later')
 196                     return False
 197
 198                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 199                 if not tl:
 200                     warn('Unable to extract TL')
 201                     return False
 202
 203                 tfa_code = self._get_tfa_info('2-step verification code')
 204
 205                 if not tfa_code:
 206                     warn(
 207                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 208                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 209                     return False
 210
 211                 tfa_code = remove_start(tfa_code, 'G-')
 212
 213                 tfa_req = [
 214                     user_hash, None, 2, None,
 215                     [
 216                         9, None, None, None, None, None, None, None,
 217                         [None, tfa_code, True, 2]
 218                     ]]
 219
 220                 tfa_results = req(
 221                     self._TFA_URL.format(tl), tfa_req,
 222                     'Submitting TFA code', 'Unable to submit TFA code')
 223
 224                 if tfa_results is False:
 225                     return False
 226
 227                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 228                 if tfa_res:
 229                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 230                     warn(
 231                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 232                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 233                     return False
 234
 235                 check_cookie_url = try_get(
 236                     tfa_results, lambda x: x[0][-1][2], compat_str)
 237             else:
 238                 CHALLENGES = {
 239                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 240                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 241                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 242                 }
 243                 challenge = CHALLENGES.get(
 244                     challenge_str,
 245                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 246                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 247                 return False
 248         else:
 249             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 250
 251         if not check_cookie_url:
 252             warn('Unable to extract CheckCookie URL')
 253             return False
 254
 255         check_cookie_results = self._download_webpage(
 256             check_cookie_url, None, 'Checking cookie', fatal=False)
 257
 258         if check_cookie_results is False:
 259             return False
 260
 261         if 'https://myaccount.google.com/' not in check_cookie_results:
 262             warn('Unable to log in')
 263             return False
 264
 265         return True
 266
 267     def _download_webpage_handle(self, *args, **kwargs):
 268         query = kwargs.get('query', {}).copy()
 269         query['disable_polymer'] = 'true'
 270         kwargs['query'] = query
 271         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 272             *args, **compat_kwargs(kwargs))
 273
 274     def _real_initialize(self):
 275         if self._downloader is None:
 276             return
 277         self._set_language()
 278         if not self._login():
 279             return
 280
 281
 282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 283     # Extract entries from page with "Load more" button
 284     def _entries(self, page, playlist_id):
 285         more_widget_html = content_html = page
 286         for page_num in itertools.count(1):
 287             for entry in self._process_page(content_html):
 288                 yield entry
 289
 290             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 291             if not mobj:
 292                 break
 293
 294             count = 0
 295             retries = 3
 296             while count <= retries:
 297                 try:
 298                     # Downloading page may result in intermittent 5xx HTTP error
 299                     # that is usually worked around with a retry
 300                     more = self._download_json(
 301                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 302                         'Downloading page #%s%s'
 303                         % (page_num, ' (retry #%d)' % count if count else ''),
 304                         transform_source=uppercase_escape)
 305                     break
 306                 except ExtractorError as e:
 307                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 308                         count += 1
 309                         if count <= retries:
 310                             continue
 311                     raise
 312
 313             content_html = more['content_html']
 314             if not content_html.strip():
 315                 # Some webpages show a "Load more" button but they don't
 316                 # have more videos
 317                 break
 318             more_widget_html = more['load_more_widget_html']
 319
 320
 321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 322     def _process_page(self, content):
 323         for video_id, video_title in self.extract_videos_from_page(content):
 324             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 325
 326     def extract_videos_from_page(self, page):
 327         ids_in_page = []
 328         titles_in_page = []
 329         for mobj in re.finditer(self._VIDEO_RE, page):
 330             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 331             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 332                 continue
 333             video_id = mobj.group('id')
 334             video_title = unescapeHTML(mobj.group('title'))
 335             if video_title:
 336                 video_title = video_title.strip()
 337             try:
 338                 idx = ids_in_page.index(video_id)
 339                 if video_title and not titles_in_page[idx]:
 340                     titles_in_page[idx] = video_title
 341             except ValueError:
 342                 ids_in_page.append(video_id)
 343                 titles_in_page.append(video_title)
 344         return zip(ids_in_page, titles_in_page)
 345
 346
 347 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 348     def _process_page(self, content):
 349         for playlist_id in orderedSet(re.findall(
 350                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 351                 content)):
 352             yield self.url_result(
 353                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 354
 355     def _real_extract(self, url):
 356         playlist_id = self._match_id(url)
 357         webpage = self._download_webpage(url, playlist_id)
 358         title = self._og_search_title(webpage, fatal=False)
 359         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 360
 361
 362 class YoutubeIE(YoutubeBaseInfoExtractor):
 363     IE_DESC = 'YouTube.com'
 364     _VALID_URL = r"""(?x)^
 365                      (
 366                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 367                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 368                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 369                             (?:www\.)?pwnyoutube\.com/|
 370                             (?:www\.)?hooktube\.com/|
 371                             (?:www\.)?yourepeat\.com/|
 372                             tube\.majestyc\.net/|
 373                             (?:(?:www|dev)\.)?invidio\.us/|
 374                             (?:www\.)?invidiou\.sh/|
 375                             (?:www\.)?invidious\.snopyta\.org/|
 376                             (?:www\.)?invidious\.kabi\.tk/|
 377                             (?:www\.)?vid\.wxzm\.sx/|
 378                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 379                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 380                          (?:                                                  # the various things that can precede the ID:
 381                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 382                              |(?:                                             # or the v= param in all its forms
 383                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 384                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 385                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 386                                  v=
 387                              )
 388                          ))
 389                          |(?:
 390                             youtu\.be|                                        # just youtu.be/xxxx
 391                             vid\.plus|                                        # or vid.plus/xxxx
 392                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 393                          )/
 394                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 395                          )
 396                      )?                                                       # all until now is optional -> you can pass the naked ID
 397                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 398                      (?!.*?\blist=
 399                         (?:
 400                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 401                             WL                                                # WL are handled by the watch later IE
 402                         )
 403                      )
 404                      (?(1).+)?                                                # if we found the ID, everything can follow
 405                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 406     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 407     _formats = {
 408         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 409         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 410         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 411         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 412         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 413         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 414         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 415         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 416         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 417         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 418         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 419         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 420         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 421         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 422         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 423         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 424         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 425         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 426
 427
 428         # 3D videos
 429         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 430         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 431         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 432         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 433         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 434         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 435         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 436
 437         # Apple HTTP Live Streaming
 438         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 439         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 440         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 441         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 442         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 443         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 444         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 445         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 446
 447         # DASH mp4 video
 448         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 449         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 450         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 451         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 452         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 453         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 454         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 455         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 456         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 457         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 458         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 459         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 460
 461         # Dash mp4 audio
 462         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 463         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 464         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 465         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 466         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 467         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 468         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 469
 470         # Dash webm
 471         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 472         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 473         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 474         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 475         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 476         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 477         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 478         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 480         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 481         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 482         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 484         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 485         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 486         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 487         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 488         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 489         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 490         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 491         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 492         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 493
 494         # Dash webm audio
 495         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 496         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 497
 498         # Dash webm audio with opus inside
 499         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 500         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 501         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 502
 503         # RTMP (unnamed)
 504         '_rtmp': {'protocol': 'rtmp'},
 505
 506         # av01 video only formats sometimes served with "unknown" codecs
 507         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 508         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 509         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 510         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 511     }
 512     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 513
 514     _GEO_BYPASS = False
 515
 516     IE_NAME = 'youtube'
 517     _TESTS = [
 518         {
 519             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 520             'info_dict': {
 521                 'id': 'BaW_jenozKc',
 522                 'ext': 'mp4',
 523                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 524                 'uploader': 'Philipp Hagemeister',
 525                 'uploader_id': 'phihag',
 526                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 527                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 528                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 529                 'upload_date': '20121002',
 530                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 531                 'categories': ['Science & Technology'],
 532                 'tags': ['youtube-dl'],
 533                 'duration': 10,
 534                 'view_count': int,
 535                 'like_count': int,
 536                 'dislike_count': int,
 537                 'start_time': 1,
 538                 'end_time': 9,
 539             }
 540         },
 541         {
 542             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 543             'note': 'Test generic use_cipher_signature video (#897)',
 544             'info_dict': {
 545                 'id': 'UxxajLWwzqY',
 546                 'ext': 'mp4',
 547                 'upload_date': '20120506',
 548                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 549                 'alt_title': 'I Love It (feat. Charli XCX)',
 550                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 551                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 552                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 553                          'iconic ep', 'iconic', 'love', 'it'],
 554                 'duration': 180,
 555                 'uploader': 'Icona Pop',
 556                 'uploader_id': 'IconaPop',
 557                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 558                 'creator': 'Icona Pop',
 559                 'track': 'I Love It (feat. Charli XCX)',
 560                 'artist': 'Icona Pop',
 561             }
 562         },
 563         {
 564             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 565             'note': 'Test VEVO video with age protection (#956)',
 566             'info_dict': {
 567                 'id': '07FYdnEawAQ',
 568                 'ext': 'mp4',
 569                 'upload_date': '20130703',
 570                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 571                 'alt_title': 'Tunnel Vision',
 572                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 573                 'duration': 419,
 574                 'uploader': 'justintimberlakeVEVO',
 575                 'uploader_id': 'justintimberlakeVEVO',
 576                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 577                 'creator': 'Justin Timberlake',
 578                 'track': 'Tunnel Vision',
 579                 'artist': 'Justin Timberlake',
 580                 'age_limit': 18,
 581             }
 582         },
 583         {
 584             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 585             'note': 'Embed-only video (#1746)',
 586             'info_dict': {
 587                 'id': 'yZIXLfi8CZQ',
 588                 'ext': 'mp4',
 589                 'upload_date': '20120608',
 590                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 591                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 592                 'uploader': 'SET India',
 593                 'uploader_id': 'setindia',
 594                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 595                 'age_limit': 18,
 596             }
 597         },
 598         {
 599             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 600             'note': 'Use the first video ID in the URL',
 601             'info_dict': {
 602                 'id': 'BaW_jenozKc',
 603                 'ext': 'mp4',
 604                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 605                 'uploader': 'Philipp Hagemeister',
 606                 'uploader_id': 'phihag',
 607                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 608                 'upload_date': '20121002',
 609                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 610                 'categories': ['Science & Technology'],
 611                 'tags': ['youtube-dl'],
 612                 'duration': 10,
 613                 'view_count': int,
 614                 'like_count': int,
 615                 'dislike_count': int,
 616             },
 617             'params': {
 618                 'skip_download': True,
 619             },
 620         },
 621         {
 622             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 623             'note': '256k DASH audio (format 141) via DASH manifest',
 624             'info_dict': {
 625                 'id': 'a9LDPn-MO4I',
 626                 'ext': 'm4a',
 627                 'upload_date': '20121002',
 628                 'uploader_id': '8KVIDEO',
 629                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 630                 'description': '',
 631                 'uploader': '8KVIDEO',
 632                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 633             },
 634             'params': {
 635                 'youtube_include_dash_manifest': True,
 636                 'format': '141',
 637             },
 638             'skip': 'format 141 not served anymore',
 639         },
 640         # DASH manifest with encrypted signature
 641         {
 642             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 643             'info_dict': {
 644                 'id': 'IB3lcPjvWLA',
 645                 'ext': 'm4a',
 646                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 647                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 648                 'duration': 244,
 649                 'uploader': 'AfrojackVEVO',
 650                 'uploader_id': 'AfrojackVEVO',
 651                 'upload_date': '20131011',
 652             },
 653             'params': {
 654                 'youtube_include_dash_manifest': True,
 655                 'format': '141/bestaudio[ext=m4a]',
 656             },
 657         },
 658         # JS player signature function name containing $
 659         {
 660             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 661             'info_dict': {
 662                 'id': 'nfWlot6h_JM',
 663                 'ext': 'm4a',
 664                 'title': 'Taylor Swift - Shake It Off',
 665                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
 666                 'duration': 242,
 667                 'uploader': 'TaylorSwiftVEVO',
 668                 'uploader_id': 'TaylorSwiftVEVO',
 669                 'upload_date': '20140818',
 670                 'creator': 'Taylor Swift',
 671             },
 672             'params': {
 673                 'youtube_include_dash_manifest': True,
 674                 'format': '141/bestaudio[ext=m4a]',
 675             },
 676         },
 677         # Controversy video
 678         {
 679             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 680             'info_dict': {
 681                 'id': 'T4XJQO3qol8',
 682                 'ext': 'mp4',
 683                 'duration': 219,
 684                 'upload_date': '20100909',
 685                 'uploader': 'Amazing Atheist',
 686                 'uploader_id': 'TheAmazingAtheist',
 687                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 688                 'title': 'Burning Everyone\'s Koran',
 689                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 690             }
 691         },
 692         # Normal age-gate video (No vevo, embed allowed)
 693         {
 694             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 695             'info_dict': {
 696                 'id': 'HtVdAasjOgU',
 697                 'ext': 'mp4',
 698                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 699                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 700                 'duration': 142,
 701                 'uploader': 'The Witcher',
 702                 'uploader_id': 'WitcherGame',
 703                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 704                 'upload_date': '20140605',
 705                 'age_limit': 18,
 706             },
 707         },
 708         # Age-gate video with encrypted signature
 709         {
 710             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 711             'info_dict': {
 712                 'id': '6kLq3WMV1nU',
 713                 'ext': 'mp4',
 714                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 715                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 716                 'duration': 246,
 717                 'uploader': 'LloydVEVO',
 718                 'uploader_id': 'LloydVEVO',
 719                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 720                 'upload_date': '20110629',
 721                 'age_limit': 18,
 722             },
 723         },
 724         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 725         # YouTube Red ad is not captured for creator
 726         {
 727             'url': '__2ABJjxzNo',
 728             'info_dict': {
 729                 'id': '__2ABJjxzNo',
 730                 'ext': 'mp4',
 731                 'duration': 266,
 732                 'upload_date': '20100430',
 733                 'uploader_id': 'deadmau5',
 734                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 735                 'creator': 'deadmau5',
 736                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 737                 'uploader': 'deadmau5',
 738                 'title': 'Deadmau5 - Some Chords (HD)',
 739                 'alt_title': 'Some Chords',
 740             },
 741             'expected_warnings': [
 742                 'DASH manifest missing',
 743             ]
 744         },
 745         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 746         {
 747             'url': 'lqQg6PlCWgI',
 748             'info_dict': {
 749                 'id': 'lqQg6PlCWgI',
 750                 'ext': 'mp4',
 751                 'duration': 6085,
 752                 'upload_date': '20150827',
 753                 'uploader_id': 'olympic',
 754                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 755                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 756                 'uploader': 'Olympic',
 757                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 758             },
 759             'params': {
 760                 'skip_download': 'requires avconv',
 761             }
 762         },
 763         # Non-square pixels
 764         {
 765             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 766             'info_dict': {
 767                 'id': '_b-2C3KPAM0',
 768                 'ext': 'mp4',
 769                 'stretched_ratio': 16 / 9.,
 770                 'duration': 85,
 771                 'upload_date': '20110310',
 772                 'uploader_id': 'AllenMeow',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 774                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 775                 'uploader': '孫ᄋᄅ',
 776                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 777             },
 778         },
 779         # url_encoded_fmt_stream_map is empty string
 780         {
 781             'url': 'qEJwOuvDf7I',
 782             'info_dict': {
 783                 'id': 'qEJwOuvDf7I',
 784                 'ext': 'webm',
 785                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 786                 'description': '',
 787                 'upload_date': '20150404',
 788                 'uploader_id': 'spbelect',
 789                 'uploader': 'Наблюдатели Петербурга',
 790             },
 791             'params': {
 792                 'skip_download': 'requires avconv',
 793             },
 794             'skip': 'This live event has ended.',
 795         },
 796         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 797         {
 798             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 799             'info_dict': {
 800                 'id': 'FIl7x6_3R5Y',
 801                 'ext': 'webm',
 802                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 803                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 804                 'duration': 220,
 805                 'upload_date': '20150625',
 806                 'uploader_id': 'dorappi2000',
 807                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 808                 'uploader': 'dorappi2000',
 809                 'formats': 'mincount:31',
 810             },
 811             'skip': 'not actual anymore',
 812         },
 813         # DASH manifest with segment_list
 814         {
 815             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 816             'md5': '8ce563a1d667b599d21064e982ab9e31',
 817             'info_dict': {
 818                 'id': 'CsmdDsKjzN8',
 819                 'ext': 'mp4',
 820                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 821                 'uploader': 'Airtek',
 822                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 823                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 824                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 825             },
 826             'params': {
 827                 'youtube_include_dash_manifest': True,
 828                 'format': '135',  # bestvideo
 829             },
 830             'skip': 'This live event has ended.',
 831         },
 832         {
 833             # Multifeed videos (multiple cameras), URL is for Main Camera
 834             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 835             'info_dict': {
 836                 'id': 'jqWvoWXjCVs',
 837                 'title': 'teamPGP: Rocket League Noob Stream',
 838                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 839             },
 840             'playlist': [{
 841                 'info_dict': {
 842                     'id': 'jqWvoWXjCVs',
 843                     'ext': 'mp4',
 844                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 845                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 846                     'duration': 7335,
 847                     'upload_date': '20150721',
 848                     'uploader': 'Beer Games Beer',
 849                     'uploader_id': 'beergamesbeer',
 850                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 851                     'license': 'Standard YouTube License',
 852                 },
 853             }, {
 854                 'info_dict': {
 855                     'id': '6h8e8xoXJzg',
 856                     'ext': 'mp4',
 857                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 858                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 859                     'duration': 7337,
 860                     'upload_date': '20150721',
 861                     'uploader': 'Beer Games Beer',
 862                     'uploader_id': 'beergamesbeer',
 863                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 864                     'license': 'Standard YouTube License',
 865                 },
 866             }, {
 867                 'info_dict': {
 868                     'id': 'PUOgX5z9xZw',
 869                     'ext': 'mp4',
 870                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 871                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 872                     'duration': 7337,
 873                     'upload_date': '20150721',
 874                     'uploader': 'Beer Games Beer',
 875                     'uploader_id': 'beergamesbeer',
 876                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 877                     'license': 'Standard YouTube License',
 878                 },
 879             }, {
 880                 'info_dict': {
 881                     'id': 'teuwxikvS5k',
 882                     'ext': 'mp4',
 883                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 885                     'duration': 7334,
 886                     'upload_date': '20150721',
 887                     'uploader': 'Beer Games Beer',
 888                     'uploader_id': 'beergamesbeer',
 889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 890                     'license': 'Standard YouTube License',
 891                 },
 892             }],
 893             'params': {
 894                 'skip_download': True,
 895             },
 896             'skip': 'This video is not available.',
 897         },
 898         {
 899             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 900             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 901             'info_dict': {
 902                 'id': 'gVfLd0zydlo',
 903                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 904             },
 905             'playlist_count': 2,
 906             'skip': 'Not multifeed anymore',
 907         },
 908         {
 909             'url': 'https://vid.plus/FlRa-iH7PGw',
 910             'only_matching': True,
 911         },
 912         {
 913             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 914             'only_matching': True,
 915         },
 916         {
 917             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 918             # Also tests cut-off URL expansion in video description (see
 919             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 920             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 921             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 922             'info_dict': {
 923                 'id': 'lsguqyKfVQg',
 924                 'ext': 'mp4',
 925                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 926                 'alt_title': 'Dark Walk - Position Music',
 927                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 928                 'duration': 133,
 929                 'upload_date': '20151119',
 930                 'uploader_id': 'IronSoulElf',
 931                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 932                 'uploader': 'IronSoulElf',
 933                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 934                 'track': 'Dark Walk - Position Music',
 935                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 936                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 937             },
 938             'params': {
 939                 'skip_download': True,
 940             },
 941         },
 942         {
 943             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 944             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 945             'only_matching': True,
 946         },
 947         {
 948             # Video with yt:stretch=17:0
 949             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 950             'info_dict': {
 951                 'id': 'Q39EVAstoRM',
 952                 'ext': 'mp4',
 953                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 954                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 955                 'upload_date': '20151107',
 956                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 957                 'uploader': 'CH GAMER DROID',
 958             },
 959             'params': {
 960                 'skip_download': True,
 961             },
 962             'skip': 'This video does not exist.',
 963         },
 964         {
 965             # Video licensed under Creative Commons
 966             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 967             'info_dict': {
 968                 'id': 'M4gD1WSo5mA',
 969                 'ext': 'mp4',
 970                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 971                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 972                 'duration': 721,
 973                 'upload_date': '20150127',
 974                 'uploader_id': 'BerkmanCenter',
 975                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 976                 'uploader': 'The Berkman Klein Center for Internet & Society',
 977                 'license': 'Creative Commons Attribution license (reuse allowed)',
 978             },
 979             'params': {
 980                 'skip_download': True,
 981             },
 982         },
 983         {
 984             # Channel-like uploader_url
 985             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 986             'info_dict': {
 987                 'id': 'eQcmzGIKrzg',
 988                 'ext': 'mp4',
 989                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 990                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 991                 'duration': 4060,
 992                 'upload_date': '20151119',
 993                 'uploader': 'Bernie Sanders',
 994                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 996                 'license': 'Creative Commons Attribution license (reuse allowed)',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001         },
1002         {
1003             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1004             'only_matching': True,
1005         },
1006         {
1007             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1008             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1009             'only_matching': True,
1010         },
1011         {
1012             # Rental video preview
1013             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1014             'info_dict': {
1015                 'id': 'uGpuVWrhIzE',
1016                 'ext': 'mp4',
1017                 'title': 'Piku - Trailer',
1018                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1019                 'upload_date': '20150811',
1020                 'uploader': 'FlixMatrix',
1021                 'uploader_id': 'FlixMatrixKaravan',
1022                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1023                 'license': 'Standard YouTube License',
1024             },
1025             'params': {
1026                 'skip_download': True,
1027             },
1028             'skip': 'This video is not available.',
1029         },
1030         {
1031             # YouTube Red video with episode data
1032             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1033             'info_dict': {
1034                 'id': 'iqKdEhx-dD4',
1035                 'ext': 'mp4',
1036                 'title': 'Isolation - Mind Field (Ep 1)',
1037                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1038                 'duration': 2085,
1039                 'upload_date': '20170118',
1040                 'uploader': 'Vsauce',
1041                 'uploader_id': 'Vsauce',
1042                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1043                 'series': 'Mind Field',
1044                 'season_number': 1,
1045                 'episode_number': 1,
1046             },
1047             'params': {
1048                 'skip_download': True,
1049             },
1050             'expected_warnings': [
1051                 'Skipping DASH manifest',
1052             ],
1053         },
1054         {
1055             # The following content has been identified by the YouTube community
1056             # as inappropriate or offensive to some audiences.
1057             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1058             'info_dict': {
1059                 'id': '6SJNVb0GnPI',
1060                 'ext': 'mp4',
1061                 'title': 'Race Differences in Intelligence',
1062                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1063                 'duration': 965,
1064                 'upload_date': '20140124',
1065                 'uploader': 'New Century Foundation',
1066                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1067                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1068             },
1069             'params': {
1070                 'skip_download': True,
1071             },
1072         },
1073         {
1074             # itag 212
1075             'url': '1t24XAntNCY',
1076             'only_matching': True,
1077         },
1078         {
1079             # geo restricted to JP
1080             'url': 'sJL6WA-aGkQ',
1081             'only_matching': True,
1082         },
1083         {
1084             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1085             'only_matching': True,
1086         },
1087         {
1088             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1089             'only_matching': True,
1090         },
1091         {
1092             # DRM protected
1093             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1094             'only_matching': True,
1095         },
1096         {
1097             # Video with unsupported adaptive stream type formats
1098             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1099             'info_dict': {
1100                 'id': 'Z4Vy8R84T1U',
1101                 'ext': 'mp4',
1102                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1103                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1104                 'duration': 433,
1105                 'upload_date': '20130923',
1106                 'uploader': 'Amelia Putri Harwita',
1107                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1108                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1109                 'formats': 'maxcount:10',
1110             },
1111             'params': {
1112                 'skip_download': True,
1113                 'youtube_include_dash_manifest': False,
1114             },
1115         },
1116         {
1117             # Youtube Music Auto-generated description
1118             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1119             'info_dict': {
1120                 'id': 'MgNrAu2pzNs',
1121                 'ext': 'mp4',
1122                 'title': 'Voyeur Girl',
1123                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1124                 'upload_date': '20190312',
1125                 'uploader': 'Various Artists - Topic',
1126                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1127                 'artist': 'Stephen',
1128                 'track': 'Voyeur Girl',
1129                 'album': 'it\'s too much love to know my dear',
1130                 'release_date': '20190313',
1131                 'release_year': 2019,
1132             },
1133             'params': {
1134                 'skip_download': True,
1135             },
1136         },
1137         {
1138             # Youtube Music Auto-generated description
1139             # Retrieve 'artist' field from 'Artist:' in video description
1140             # when it is present on youtube music video
1141             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1142             'info_dict': {
1143                 'id': 'k0jLE7tTwjY',
1144                 'ext': 'mp4',
1145                 'title': 'Latch Feat. Sam Smith',
1146                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1147                 'upload_date': '20150110',
1148                 'uploader': 'Various Artists - Topic',
1149                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1150                 'artist': 'Disclosure',
1151                 'track': 'Latch Feat. Sam Smith',
1152                 'album': 'Latch Featuring Sam Smith',
1153                 'release_date': '20121008',
1154                 'release_year': 2012,
1155             },
1156             'params': {
1157                 'skip_download': True,
1158             },
1159         },
1160         {
1161             # Youtube Music Auto-generated description
1162             # handle multiple artists on youtube music video
1163             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1164             'info_dict': {
1165                 'id': '74qn0eJSjpA',
1166                 'ext': 'mp4',
1167                 'title': 'Eastside',
1168                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1169                 'upload_date': '20180710',
1170                 'uploader': 'Benny Blanco - Topic',
1171                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1172                 'artist': 'benny blanco, Halsey, Khalid',
1173                 'track': 'Eastside',
1174                 'album': 'Eastside',
1175                 'release_date': '20180713',
1176                 'release_year': 2018,
1177             },
1178             'params': {
1179                 'skip_download': True,
1180             },
1181         },
1182         {
1183             # Youtube Music Auto-generated description
1184             # handle youtube music video with release_year and no release_date
1185             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1186             'info_dict': {
1187                 'id': '-hcAI0g-f5M',
1188                 'ext': 'mp4',
1189                 'title': 'Put It On Me',
1190                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1191                 'upload_date': '20180426',
1192                 'uploader': 'Matt Maeson - Topic',
1193                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1194                 'artist': 'Matt Maeson',
1195                 'track': 'Put It On Me',
1196                 'album': 'The Hearse',
1197                 'release_date': None,
1198                 'release_year': 2018,
1199             },
1200             'params': {
1201                 'skip_download': True,
1202             },
1203         },
1204     ]
1205
1206     def __init__(self, *args, **kwargs):
1207         super(YoutubeIE, self).__init__(*args, **kwargs)
1208         self._player_cache = {}
1209
1210     def report_video_info_webpage_download(self, video_id):
1211         """Report attempt to download video info webpage."""
1212         self.to_screen('%s: Downloading video info webpage' % video_id)
1213
1214     def report_information_extraction(self, video_id):
1215         """Report attempt to extract video information."""
1216         self.to_screen('%s: Extracting video information' % video_id)
1217
1218     def report_unavailable_format(self, video_id, format):
1219         """Report extracted video URL."""
1220         self.to_screen('%s: Format %s not available' % (video_id, format))
1221
1222     def report_rtmp_download(self):
1223         """Indicate the download will use the RTMP protocol."""
1224         self.to_screen('RTMP download detected')
1225
1226     def _signature_cache_id(self, example_sig):
1227         """ Return a string representation of a signature """
1228         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1229
1230     def _extract_signature_function(self, video_id, player_url, example_sig):
1231         id_m = re.match(
1232             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1233             player_url)
1234         if not id_m:
1235             raise ExtractorError('Cannot identify player %r' % player_url)
1236         player_type = id_m.group('ext')
1237         player_id = id_m.group('id')
1238
1239         # Read from filesystem cache
1240         func_id = '%s_%s_%s' % (
1241             player_type, player_id, self._signature_cache_id(example_sig))
1242         assert os.path.basename(func_id) == func_id
1243
1244         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1245         if cache_spec is not None:
1246             return lambda s: ''.join(s[i] for i in cache_spec)
1247
1248         download_note = (
1249             'Downloading player %s' % player_url
1250             if self._downloader.params.get('verbose') else
1251             'Downloading %s player %s' % (player_type, player_id)
1252         )
1253         if player_type == 'js':
1254             code = self._download_webpage(
1255                 player_url, video_id,
1256                 note=download_note,
1257                 errnote='Download of %s failed' % player_url)
1258             res = self._parse_sig_js(code)
1259         elif player_type == 'swf':
1260             urlh = self._request_webpage(
1261                 player_url, video_id,
1262                 note=download_note,
1263                 errnote='Download of %s failed' % player_url)
1264             code = urlh.read()
1265             res = self._parse_sig_swf(code)
1266         else:
1267             assert False, 'Invalid player type %r' % player_type
1268
1269         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1270         cache_res = res(test_string)
1271         cache_spec = [ord(c) for c in cache_res]
1272
1273         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1274         return res
1275
1276     def _print_sig_code(self, func, example_sig):
1277         def gen_sig_code(idxs):
1278             def _genslice(start, end, step):
1279                 starts = '' if start == 0 else str(start)
1280                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1281                 steps = '' if step == 1 else (':%d' % step)
1282                 return 's[%s%s%s]' % (starts, ends, steps)
1283
1284             step = None
1285             # Quelch pyflakes warnings - start will be set when step is set
1286             start = '(Never used)'
1287             for i, prev in zip(idxs[1:], idxs[:-1]):
1288                 if step is not None:
1289                     if i - prev == step:
1290                         continue
1291                     yield _genslice(start, prev, step)
1292                     step = None
1293                     continue
1294                 if i - prev in [-1, 1]:
1295                     step = i - prev
1296                     start = prev
1297                     continue
1298                 else:
1299                     yield 's[%d]' % prev
1300             if step is None:
1301                 yield 's[%d]' % i
1302             else:
1303                 yield _genslice(start, i, step)
1304
1305         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1306         cache_res = func(test_string)
1307         cache_spec = [ord(c) for c in cache_res]
1308         expr_code = ' + '.join(gen_sig_code(cache_spec))
1309         signature_id_tuple = '(%s)' % (
1310             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1311         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1312                 '    return %s\n') % (signature_id_tuple, expr_code)
1313         self.to_screen('Extracted signature function:\n' + code)
1314
1315     def _parse_sig_js(self, jscode):
1316         funcname = self._search_regex(
1317             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1318              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1319              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1320              # Obsolete patterns
1321              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1322              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1323              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1324              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1325              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1326              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1327              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1328              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1329             jscode, 'Initial JS player signature function name', group='sig')
1330
1331         jsi = JSInterpreter(jscode)
1332         initial_function = jsi.extract_function(funcname)
1333         return lambda s: initial_function([s])
1334
1335     def _parse_sig_swf(self, file_contents):
1336         swfi = SWFInterpreter(file_contents)
1337         TARGET_CLASSNAME = 'SignatureDecipher'
1338         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1339         initial_function = swfi.extract_function(searched_class, 'decipher')
1340         return lambda s: initial_function([s])
1341
1342     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1343         """Turn the encrypted s field into a working signature"""
1344
1345         if player_url is None:
1346             raise ExtractorError('Cannot decrypt signature without player_url')
1347
1348         if player_url.startswith('//'):
1349             player_url = 'https:' + player_url
1350         elif not re.match(r'https?://', player_url):
1351             player_url = compat_urlparse.urljoin(
1352                 'https://www.youtube.com', player_url)
1353         try:
1354             player_id = (player_url, self._signature_cache_id(s))
1355             if player_id not in self._player_cache:
1356                 func = self._extract_signature_function(
1357                     video_id, player_url, s
1358                 )
1359                 self._player_cache[player_id] = func
1360             func = self._player_cache[player_id]
1361             if self._downloader.params.get('youtube_print_sig_code'):
1362                 self._print_sig_code(func, s)
1363             return func(s)
1364         except Exception as e:
1365             tb = traceback.format_exc()
1366             raise ExtractorError(
1367                 'Signature extraction failed: ' + tb, cause=e)
1368
1369     def _get_subtitles(self, video_id, webpage):
1370         try:
1371             subs_doc = self._download_xml(
1372                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1373                 video_id, note=False)
1374         except ExtractorError as err:
1375             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1376             return {}
1377
1378         sub_lang_list = {}
1379         for track in subs_doc.findall('track'):
1380             lang = track.attrib['lang_code']
1381             if lang in sub_lang_list:
1382                 continue
1383             sub_formats = []
1384             for ext in self._SUBTITLE_FORMATS:
1385                 params = compat_urllib_parse_urlencode({
1386                     'lang': lang,
1387                     'v': video_id,
1388                     'fmt': ext,
1389                     'name': track.attrib['name'].encode('utf-8'),
1390                 })
1391                 sub_formats.append({
1392                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1393                     'ext': ext,
1394                 })
1395             sub_lang_list[lang] = sub_formats
1396         if not sub_lang_list:
1397             self._downloader.report_warning('video doesn\'t have subtitles')
1398             return {}
1399         return sub_lang_list
1400
1401     def _get_ytplayer_config(self, video_id, webpage):
1402         patterns = (
1403             # User data may contain arbitrary character sequences that may affect
1404             # JSON extraction with regex, e.g. when '};' is contained the second
1405             # regex won't capture the whole JSON. Yet working around by trying more
1406             # concrete regex first keeping in mind proper quoted string handling
1407             # to be implemented in future that will replace this workaround (see
1408             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1409             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1410             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1411             r';ytplayer\.config\s*=\s*({.+?});',
1412         )
1413         config = self._search_regex(
1414             patterns, webpage, 'ytplayer.config', default=None)
1415         if config:
1416             return self._parse_json(
1417                 uppercase_escape(config), video_id, fatal=False)
1418
1419     def _get_automatic_captions(self, video_id, webpage):
1420         """We need the webpage for getting the captions url, pass it as an
1421            argument to speed up the process."""
1422         self.to_screen('%s: Looking for automatic captions' % video_id)
1423         player_config = self._get_ytplayer_config(video_id, webpage)
1424         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1425         if not player_config:
1426             self._downloader.report_warning(err_msg)
1427             return {}
1428         try:
1429             args = player_config['args']
1430             caption_url = args.get('ttsurl')
1431             if caption_url:
1432                 timestamp = args['timestamp']
1433                 # We get the available subtitles
1434                 list_params = compat_urllib_parse_urlencode({
1435                     'type': 'list',
1436                     'tlangs': 1,
1437                     'asrs': 1,
1438                 })
1439                 list_url = caption_url + '&' + list_params
1440                 caption_list = self._download_xml(list_url, video_id)
1441                 original_lang_node = caption_list.find('track')
1442                 if original_lang_node is None:
1443                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1444                     return {}
1445                 original_lang = original_lang_node.attrib['lang_code']
1446                 caption_kind = original_lang_node.attrib.get('kind', '')
1447
1448                 sub_lang_list = {}
1449                 for lang_node in caption_list.findall('target'):
1450                     sub_lang = lang_node.attrib['lang_code']
1451                     sub_formats = []
1452                     for ext in self._SUBTITLE_FORMATS:
1453                         params = compat_urllib_parse_urlencode({
1454                             'lang': original_lang,
1455                             'tlang': sub_lang,
1456                             'fmt': ext,
1457                             'ts': timestamp,
1458                             'kind': caption_kind,
1459                         })
1460                         sub_formats.append({
1461                             'url': caption_url + '&' + params,
1462                             'ext': ext,
1463                         })
1464                     sub_lang_list[sub_lang] = sub_formats
1465                 return sub_lang_list
1466
1467             def make_captions(sub_url, sub_langs):
1468                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1469                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1470                 captions = {}
1471                 for sub_lang in sub_langs:
1472                     sub_formats = []
1473                     for ext in self._SUBTITLE_FORMATS:
1474                         caption_qs.update({
1475                             'tlang': [sub_lang],
1476                             'fmt': [ext],
1477                         })
1478                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1479                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1480                         sub_formats.append({
1481                             'url': sub_url,
1482                             'ext': ext,
1483                         })
1484                     captions[sub_lang] = sub_formats
1485                 return captions
1486
1487             # New captions format as of 22.06.2017
1488             player_response = args.get('player_response')
1489             if player_response and isinstance(player_response, compat_str):
1490                 player_response = self._parse_json(
1491                     player_response, video_id, fatal=False)
1492                 if player_response:
1493                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1494                     base_url = renderer['captionTracks'][0]['baseUrl']
1495                     sub_lang_list = []
1496                     for lang in renderer['translationLanguages']:
1497                         lang_code = lang.get('languageCode')
1498                         if lang_code:
1499                             sub_lang_list.append(lang_code)
1500                     return make_captions(base_url, sub_lang_list)
1501
1502             # Some videos don't provide ttsurl but rather caption_tracks and
1503             # caption_translation_languages (e.g. 20LmZk1hakA)
1504             # Does not used anymore as of 22.06.2017
1505             caption_tracks = args['caption_tracks']
1506             caption_translation_languages = args['caption_translation_languages']
1507             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1508             sub_lang_list = []
1509             for lang in caption_translation_languages.split(','):
1510                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1511                 sub_lang = lang_qs.get('lc', [None])[0]
1512                 if sub_lang:
1513                     sub_lang_list.append(sub_lang)
1514             return make_captions(caption_url, sub_lang_list)
1515         # An extractor error can be raise by the download process if there are
1516         # no automatic captions but there are subtitles
1517         except (KeyError, IndexError, ExtractorError):
1518             self._downloader.report_warning(err_msg)
1519             return {}
1520
1521     def _mark_watched(self, video_id, video_info, player_response):
1522         playback_url = url_or_none(try_get(
1523             player_response,
1524             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1525             video_info, lambda x: x['videostats_playback_base_url'][0]))
1526         if not playback_url:
1527             return
1528         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1529         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1530
1531         # cpn generation algorithm is reverse engineered from base.js.
1532         # In fact it works even with dummy cpn.
1533         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1534         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1535
1536         qs.update({
1537             'ver': ['2'],
1538             'cpn': [cpn],
1539         })
1540         playback_url = compat_urlparse.urlunparse(
1541             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1542
1543         self._download_webpage(
1544             playback_url, video_id, 'Marking watched',
1545             'Unable to mark watched', fatal=False)
1546
1547     @staticmethod
1548     def _extract_urls(webpage):
1549         # Embedded YouTube player
1550         entries = [
1551             unescapeHTML(mobj.group('url'))
1552             for mobj in re.finditer(r'''(?x)
1553             (?:
1554                 <iframe[^>]+?src=|
1555                 data-video-url=|
1556                 <embed[^>]+?src=|
1557                 embedSWF\(?:\s*|
1558                 <object[^>]+data=|
1559                 new\s+SWFObject\(
1560             )
1561             (["\'])
1562                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1563                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1564             \1''', webpage)]
1565
1566         # lazyYT YouTube embed
1567         entries.extend(list(map(
1568             unescapeHTML,
1569             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1570
1571         # Wordpress "YouTube Video Importer" plugin
1572         matches = re.findall(r'''(?x)<div[^>]+
1573             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1574             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1575         entries.extend(m[-1] for m in matches)
1576
1577         return entries
1578
1579     @staticmethod
1580     def _extract_url(webpage):
1581         urls = YoutubeIE._extract_urls(webpage)
1582         return urls[0] if urls else None
1583
1584     @classmethod
1585     def extract_id(cls, url):
1586         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1587         if mobj is None:
1588             raise ExtractorError('Invalid URL: %s' % url)
1589         video_id = mobj.group(2)
1590         return video_id
1591
1592     def _extract_annotations(self, video_id):
1593         return self._download_webpage(
1594             'https://www.youtube.com/annotations_invideo', video_id,
1595             note='Downloading annotations',
1596             errnote='Unable to download video annotations', fatal=False,
1597             query={
1598                 'features': 1,
1599                 'legacy': 1,
1600                 'video_id': video_id,
1601             })
1602
1603     @staticmethod
1604     def _extract_chapters(description, duration):
1605         if not description:
1606             return None
1607         chapter_lines = re.findall(
1608             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1609             description)
1610         if not chapter_lines:
1611             return None
1612         chapters = []
1613         for next_num, (chapter_line, time_point) in enumerate(
1614                 chapter_lines, start=1):
1615             start_time = parse_duration(time_point)
1616             if start_time is None:
1617                 continue
1618             if start_time > duration:
1619                 break
1620             end_time = (duration if next_num == len(chapter_lines)
1621                         else parse_duration(chapter_lines[next_num][1]))
1622             if end_time is None:
1623                 continue
1624             if end_time > duration:
1625                 end_time = duration
1626             if start_time > end_time:
1627                 break
1628             chapter_title = re.sub(
1629                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1630             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1631             chapters.append({
1632                 'start_time': start_time,
1633                 'end_time': end_time,
1634                 'title': chapter_title,
1635             })
1636         return chapters
1637
1638     def _real_extract(self, url):
1639         url, smuggled_data = unsmuggle_url(url, {})
1640
1641         proto = (
1642             'http' if self._downloader.params.get('prefer_insecure', False)
1643             else 'https')
1644
1645         start_time = None
1646         end_time = None
1647         parsed_url = compat_urllib_parse_urlparse(url)
1648         for component in [parsed_url.fragment, parsed_url.query]:
1649             query = compat_parse_qs(component)
1650             if start_time is None and 't' in query:
1651                 start_time = parse_duration(query['t'][0])
1652             if start_time is None and 'start' in query:
1653                 start_time = parse_duration(query['start'][0])
1654             if end_time is None and 'end' in query:
1655                 end_time = parse_duration(query['end'][0])
1656
1657         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1658         mobj = re.search(self._NEXT_URL_RE, url)
1659         if mobj:
1660             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1661         video_id = self.extract_id(url)
1662
1663         # Get video webpage
1664         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1665         video_webpage = self._download_webpage(url, video_id)
1666
1667         # Attempt to extract SWF player URL
1668         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1669         if mobj is not None:
1670             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1671         else:
1672             player_url = None
1673
1674         dash_mpds = []
1675
1676         def add_dash_mpd(video_info):
1677             dash_mpd = video_info.get('dashmpd')
1678             if dash_mpd and dash_mpd[0] not in dash_mpds:
1679                 dash_mpds.append(dash_mpd[0])
1680
1681         def add_dash_mpd_pr(pl_response):
1682             dash_mpd = url_or_none(try_get(
1683                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1684                 compat_str))
1685             if dash_mpd and dash_mpd not in dash_mpds:
1686                 dash_mpds.append(dash_mpd)
1687
1688         is_live = None
1689         view_count = None
1690
1691         def extract_view_count(v_info):
1692             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1693
1694         def extract_token(v_info):
1695             return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1696
1697         player_response = {}
1698
1699         # Get video info
1700         embed_webpage = None
1701         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1702             age_gate = True
1703             # We simulate the access to the video from www.youtube.com/v/{video_id}
1704             # this can be viewed without login into Youtube
1705             url = proto + '://www.youtube.com/embed/%s' % video_id
1706             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1707             data = compat_urllib_parse_urlencode({
1708                 'video_id': video_id,
1709                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1710                 'sts': self._search_regex(
1711                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1712             })
1713             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1714             video_info_webpage = self._download_webpage(
1715                 video_info_url, video_id,
1716                 note='Refetching age-gated info webpage',
1717                 errnote='unable to download video info webpage')
1718             video_info = compat_parse_qs(video_info_webpage)
1719             add_dash_mpd(video_info)
1720         else:
1721             age_gate = False
1722             video_info = None
1723             sts = None
1724             # Try looking directly into the video webpage
1725             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1726             if ytplayer_config:
1727                 args = ytplayer_config['args']
1728                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1729                     # Convert to the same format returned by compat_parse_qs
1730                     video_info = dict((k, [v]) for k, v in args.items())
1731                     add_dash_mpd(video_info)
1732                 # Rental video is not rented but preview is available (e.g.
1733                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1734                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1735                 if not video_info and args.get('ypc_vid'):
1736                     return self.url_result(
1737                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1738                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1739                     is_live = True
1740                 sts = ytplayer_config.get('sts')
1741                 if not player_response:
1742                     pl_response = str_or_none(args.get('player_response'))
1743                     if pl_response:
1744                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1745                         if isinstance(pl_response, dict):
1746                             player_response = pl_response
1747             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1748                 add_dash_mpd_pr(player_response)
1749                 # We also try looking in get_video_info since it may contain different dashmpd
1750                 # URL that points to a DASH manifest with possibly different itag set (some itags
1751                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1752                 # manifest pointed by get_video_info's dashmpd).
1753                 # The general idea is to take a union of itags of both DASH manifests (for example
1754                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1755                 self.report_video_info_webpage_download(video_id)
1756                 for el in ('embedded', 'detailpage', 'vevo', ''):
1757                     query = {
1758                         'video_id': video_id,
1759                         'ps': 'default',
1760                         'eurl': '',
1761                         'gl': 'US',
1762                         'hl': 'en',
1763                     }
1764                     if el:
1765                         query['el'] = el
1766                     if sts:
1767                         query['sts'] = sts
1768                     video_info_webpage = self._download_webpage(
1769                         '%s://www.youtube.com/get_video_info' % proto,
1770                         video_id, note=False,
1771                         errnote='unable to download video info webpage',
1772                         fatal=False, query=query)
1773                     if not video_info_webpage:
1774                         continue
1775                     get_video_info = compat_parse_qs(video_info_webpage)
1776                     if not player_response:
1777                         pl_response = get_video_info.get('player_response', [None])[0]
1778                         if isinstance(pl_response, dict):
1779                             player_response = pl_response
1780                             add_dash_mpd_pr(player_response)
1781                     add_dash_mpd(get_video_info)
1782                     if view_count is None:
1783                         view_count = extract_view_count(get_video_info)
1784                     if not video_info:
1785                         video_info = get_video_info
1786                     get_token = extract_token(get_video_info)
1787                     if get_token:
1788                         # Different get_video_info requests may report different results, e.g.
1789                         # some may report video unavailability, but some may serve it without
1790                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1791                         # the original webpage as well as el=info and el=embedded get_video_info
1792                         # requests report video unavailability due to geo restriction while
1793                         # el=detailpage succeeds and returns valid data). This is probably
1794                         # due to YouTube measures against IP ranges of hosting providers.
1795                         # Working around by preferring the first succeeded video_info containing
1796                         # the token if no such video_info yet was found.
1797                         token = extract_token(video_info)
1798                         if not token:
1799                             video_info = get_video_info
1800                         break
1801
1802         def extract_unavailable_message():
1803             return self._html_search_regex(
1804                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1805                 video_webpage, 'unavailable message', default=None)
1806
1807         if not video_info:
1808             unavailable_message = extract_unavailable_message()
1809             if not unavailable_message:
1810                 unavailable_message = 'Unable to extract video data'
1811             raise ExtractorError(
1812                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1813
1814         video_details = try_get(
1815             player_response, lambda x: x['videoDetails'], dict) or {}
1816
1817         # title
1818         if 'title' in video_info:
1819             video_title = video_info['title'][0]
1820         elif 'title' in player_response:
1821             video_title = video_details['title']
1822         else:
1823             self._downloader.report_warning('Unable to extract video title')
1824             video_title = '_'
1825
1826         # description
1827         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1828         if video_description:
1829
1830             def replace_url(m):
1831                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1832                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1833                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1834                     qs = compat_parse_qs(parsed_redir_url.query)
1835                     q = qs.get('q')
1836                     if q and q[0]:
1837                         return q[0]
1838                 return redir_url
1839
1840             description_original = video_description = re.sub(r'''(?x)
1841                 <a\s+
1842                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1843                     (?:title|href)="([^"]+)"\s+
1844                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1845                     class="[^"]*"[^>]*>
1846                 [^<]+\.{3}\s*
1847                 </a>
1848             ''', replace_url, video_description)
1849             video_description = clean_html(video_description)
1850         else:
1851             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1852             if fd_mobj:
1853                 video_description = unescapeHTML(fd_mobj.group(1))
1854             else:
1855                 video_description = ''
1856
1857         if not smuggled_data.get('force_singlefeed', False):
1858             if not self._downloader.params.get('noplaylist'):
1859                 multifeed_metadata_list = try_get(
1860                     player_response,
1861                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1862                     compat_str) or try_get(
1863                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1864                 if multifeed_metadata_list:
1865                     entries = []
1866                     feed_ids = []
1867                     for feed in multifeed_metadata_list.split(','):
1868                         # Unquote should take place before split on comma (,) since textual
1869                         # fields may contain comma as well (see
1870                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1871                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1872                         entries.append({
1873                             '_type': 'url_transparent',
1874                             'ie_key': 'Youtube',
1875                             'url': smuggle_url(
1876                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1877                                 {'force_singlefeed': True}),
1878                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1879                         })
1880                         feed_ids.append(feed_data['id'][0])
1881                     self.to_screen(
1882                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1883                         % (', '.join(feed_ids), video_id))
1884                     return self.playlist_result(entries, video_id, video_title, video_description)
1885             else:
1886                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1887
1888         if view_count is None:
1889             view_count = extract_view_count(video_info)
1890         if view_count is None and video_details:
1891             view_count = int_or_none(video_details.get('viewCount'))
1892
1893         # Check for "rental" videos
1894         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1895             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1896
1897         def _extract_filesize(media_url):
1898             return int_or_none(self._search_regex(
1899                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1900
1901         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1902             self.report_rtmp_download()
1903             formats = [{
1904                 'format_id': '_rtmp',
1905                 'protocol': 'rtmp',
1906                 'url': video_info['conn'][0],
1907                 'player_url': player_url,
1908             }]
1909         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1910             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1911             if 'rtmpe%3Dyes' in encoded_url_map:
1912                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1913             formats_spec = {}
1914             fmt_list = video_info.get('fmt_list', [''])[0]
1915             if fmt_list:
1916                 for fmt in fmt_list.split(','):
1917                     spec = fmt.split('/')
1918                     if len(spec) > 1:
1919                         width_height = spec[1].split('x')
1920                         if len(width_height) == 2:
1921                             formats_spec[spec[0]] = {
1922                                 'resolution': spec[1],
1923                                 'width': int_or_none(width_height[0]),
1924                                 'height': int_or_none(width_height[1]),
1925                             }
1926             q = qualities(['small', 'medium', 'hd720'])
1927             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1928             if streaming_formats:
1929                 for fmt in streaming_formats:
1930                     itag = str_or_none(fmt.get('itag'))
1931                     if not itag:
1932                         continue
1933                     quality = fmt.get('quality')
1934                     quality_label = fmt.get('qualityLabel') or quality
1935                     formats_spec[itag] = {
1936                         'asr': int_or_none(fmt.get('audioSampleRate')),
1937                         'filesize': int_or_none(fmt.get('contentLength')),
1938                         'format_note': quality_label,
1939                         'fps': int_or_none(fmt.get('fps')),
1940                         'height': int_or_none(fmt.get('height')),
1941                         'quality': q(quality),
1942                         # bitrate for itag 43 is always 2147483647
1943                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1944                         'width': int_or_none(fmt.get('width')),
1945                     }
1946             formats = []
1947             for url_data_str in encoded_url_map.split(','):
1948                 url_data = compat_parse_qs(url_data_str)
1949                 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1950                     continue
1951                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1952                 # Unsupported FORMAT_STREAM_TYPE_OTF
1953                 if stream_type == 3:
1954                     continue
1955                 format_id = url_data['itag'][0]
1956                 url = url_data['url'][0]
1957
1958                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1959                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1960                     jsplayer_url_json = self._search_regex(
1961                         ASSETS_RE,
1962                         embed_webpage if age_gate else video_webpage,
1963                         'JS player URL (1)', default=None)
1964                     if not jsplayer_url_json and not age_gate:
1965                         # We need the embed website after all
1966                         if embed_webpage is None:
1967                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1968                             embed_webpage = self._download_webpage(
1969                                 embed_url, video_id, 'Downloading embed webpage')
1970                         jsplayer_url_json = self._search_regex(
1971                             ASSETS_RE, embed_webpage, 'JS player URL')
1972
1973                     player_url = json.loads(jsplayer_url_json)
1974                     if player_url is None:
1975                         player_url_json = self._search_regex(
1976                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1977                             video_webpage, 'age gate player URL')
1978                         player_url = json.loads(player_url_json)
1979
1980                 if 'sig' in url_data:
1981                     url += '&signature=' + url_data['sig'][0]
1982                 elif 's' in url_data:
1983                     encrypted_sig = url_data['s'][0]
1984
1985                     if self._downloader.params.get('verbose'):
1986                         if player_url is None:
1987                             player_version = 'unknown'
1988                             player_desc = 'unknown'
1989                         else:
1990                             if player_url.endswith('swf'):
1991                                 player_version = self._search_regex(
1992                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1993                                     'flash player', fatal=False)
1994                                 player_desc = 'flash player %s' % player_version
1995                             else:
1996                                 player_version = self._search_regex(
1997                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1998                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1999                                     player_url,
2000                                     'html5 player', fatal=False)
2001                                 player_desc = 'html5 player %s' % player_version
2002
2003                         parts_sizes = self._signature_cache_id(encrypted_sig)
2004                         self.to_screen('{%s} signature length %s, %s' %
2005                                        (format_id, parts_sizes, player_desc))
2006
2007                     signature = self._decrypt_signature(
2008                         encrypted_sig, video_id, player_url, age_gate)
2009                     sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2010                     url += '&%s=%s' % (sp, signature)
2011                 if 'ratebypass' not in url:
2012                     url += '&ratebypass=yes'
2013
2014                 dct = {
2015                     'format_id': format_id,
2016                     'url': url,
2017                     'player_url': player_url,
2018                 }
2019                 if format_id in self._formats:
2020                     dct.update(self._formats[format_id])
2021                 if format_id in formats_spec:
2022                     dct.update(formats_spec[format_id])
2023
2024                 # Some itags are not included in DASH manifest thus corresponding formats will
2025                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2026                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2027                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2028                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2029
2030                 filesize = int_or_none(url_data.get(
2031                     'clen', [None])[0]) or _extract_filesize(url)
2032
2033                 quality = url_data.get('quality', [None])[0]
2034
2035                 more_fields = {
2036                     'filesize': filesize,
2037                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2038                     'width': width,
2039                     'height': height,
2040                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2041                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2042                     'quality': q(quality),
2043                 }
2044                 for key, value in more_fields.items():
2045                     if value:
2046                         dct[key] = value
2047                 type_ = url_data.get('type', [None])[0]
2048                 if type_:
2049                     type_split = type_.split(';')
2050                     kind_ext = type_split[0].split('/')
2051                     if len(kind_ext) == 2:
2052                         kind, _ = kind_ext
2053                         dct['ext'] = mimetype2ext(type_split[0])
2054                         if kind in ('audio', 'video'):
2055                             codecs = None
2056                             for mobj in re.finditer(
2057                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2058                                 if mobj.group('key') == 'codecs':
2059                                     codecs = mobj.group('val')
2060                                     break
2061                             if codecs:
2062                                 dct.update(parse_codecs(codecs))
2063                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2064                     dct['downloader_options'] = {
2065                         # Youtube throttles chunks >~10M
2066                         'http_chunk_size': 10485760,
2067                     }
2068                 formats.append(dct)
2069         else:
2070             manifest_url = (
2071                 url_or_none(try_get(
2072                     player_response,
2073                     lambda x: x['streamingData']['hlsManifestUrl'],
2074                     compat_str))
2075                 or url_or_none(try_get(
2076                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2077             if manifest_url:
2078                 formats = []
2079                 m3u8_formats = self._extract_m3u8_formats(
2080                     manifest_url, video_id, 'mp4', fatal=False)
2081                 for a_format in m3u8_formats:
2082                     itag = self._search_regex(
2083                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2084                     if itag:
2085                         a_format['format_id'] = itag
2086                         if itag in self._formats:
2087                             dct = self._formats[itag].copy()
2088                             dct.update(a_format)
2089                             a_format = dct
2090                     a_format['player_url'] = player_url
2091                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2092                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2093                     formats.append(a_format)
2094             else:
2095                 error_message = clean_html(video_info.get('reason', [None])[0])
2096                 if not error_message:
2097                     error_message = extract_unavailable_message()
2098                 if error_message:
2099                     raise ExtractorError(error_message, expected=True)
2100                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2101
2102         # uploader
2103         video_uploader = try_get(
2104             video_info, lambda x: x['author'][0],
2105             compat_str) or str_or_none(video_details.get('author'))
2106         if video_uploader:
2107             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2108         else:
2109             self._downloader.report_warning('unable to extract uploader name')
2110
2111         # uploader_id
2112         video_uploader_id = None
2113         video_uploader_url = None
2114         mobj = re.search(
2115             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2116             video_webpage)
2117         if mobj is not None:
2118             video_uploader_id = mobj.group('uploader_id')
2119             video_uploader_url = mobj.group('uploader_url')
2120         else:
2121             self._downloader.report_warning('unable to extract uploader nickname')
2122
2123         channel_id = (
2124             str_or_none(video_details.get('channelId'))
2125             or self._html_search_meta(
2126                 'channelId', video_webpage, 'channel id', default=None)
2127             or self._search_regex(
2128                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2129                 video_webpage, 'channel id', default=None, group='id'))
2130         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2131
2132         # thumbnail image
2133         # We try first to get a high quality image:
2134         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2135                             video_webpage, re.DOTALL)
2136         if m_thumb is not None:
2137             video_thumbnail = m_thumb.group(1)
2138         elif 'thumbnail_url' not in video_info:
2139             self._downloader.report_warning('unable to extract video thumbnail')
2140             video_thumbnail = None
2141         else:   # don't panic if we can't find it
2142             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2143
2144         # upload date
2145         upload_date = self._html_search_meta(
2146             'datePublished', video_webpage, 'upload date', default=None)
2147         if not upload_date:
2148             upload_date = self._search_regex(
2149                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2150                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2151                 video_webpage, 'upload date', default=None)
2152         upload_date = unified_strdate(upload_date)
2153
2154         video_license = self._html_search_regex(
2155             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2156             video_webpage, 'license', default=None)
2157
2158         m_music = re.search(
2159             r'''(?x)
2160                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2161                 <ul[^>]*>\s*
2162                 <li>(?P<title>.+?)
2163                 by (?P<creator>.+?)
2164                 (?:
2165                     \(.+?\)|
2166                     <a[^>]*
2167                         (?:
2168                             \bhref=["\']/red[^>]*>|             # drop possible
2169                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2170                         )
2171                     .*?
2172                 )?</li
2173             ''',
2174             video_webpage)
2175         if m_music:
2176             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2177             video_creator = clean_html(m_music.group('creator'))
2178         else:
2179             video_alt_title = video_creator = None
2180
2181         def extract_meta(field):
2182             return self._html_search_regex(
2183                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2184                 video_webpage, field, default=None)
2185
2186         track = extract_meta('Song')
2187         artist = extract_meta('Artist')
2188         album = extract_meta('Album')
2189
2190         # Youtube Music Auto-generated description
2191         release_date = release_year = None
2192         if video_description:
2193             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2194             if mobj:
2195                 if not track:
2196                     track = mobj.group('track').strip()
2197                 if not artist:
2198                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2199                 if not album:
2200                     album = mobj.group('album'.strip())
2201                 release_year = mobj.group('release_year')
2202                 release_date = mobj.group('release_date')
2203                 if release_date:
2204                     release_date = release_date.replace('-', '')
2205                     if not release_year:
2206                         release_year = int(release_date[:4])
2207                 if release_year:
2208                     release_year = int(release_year)
2209
2210         m_episode = re.search(
2211             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2212             video_webpage)
2213         if m_episode:
2214             series = unescapeHTML(m_episode.group('series'))
2215             season_number = int(m_episode.group('season'))
2216             episode_number = int(m_episode.group('episode'))
2217         else:
2218             series = season_number = episode_number = None
2219
2220         m_cat_container = self._search_regex(
2221             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2222             video_webpage, 'categories', default=None)
2223         if m_cat_container:
2224             category = self._html_search_regex(
2225                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2226                 default=None)
2227             video_categories = None if category is None else [category]
2228         else:
2229             video_categories = None
2230
2231         video_tags = [
2232             unescapeHTML(m.group('content'))
2233             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2234
2235         def _extract_count(count_name):
2236             return str_to_int(self._search_regex(
2237                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2238                 % re.escape(count_name),
2239                 video_webpage, count_name, default=None))
2240
2241         like_count = _extract_count('like')
2242         dislike_count = _extract_count('dislike')
2243
2244         if view_count is None:
2245             view_count = str_to_int(self._search_regex(
2246                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2247                 'view count', default=None))
2248
2249         average_rating = (
2250             float_or_none(video_details.get('averageRating'))
2251             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2252
2253         # subtitles
2254         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2255         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2256
2257         video_duration = try_get(
2258             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2259         if not video_duration:
2260             video_duration = int_or_none(video_details.get('lengthSeconds'))
2261         if not video_duration:
2262             video_duration = parse_duration(self._html_search_meta(
2263                 'duration', video_webpage, 'video duration'))
2264
2265         # annotations
2266         video_annotations = None
2267         if self._downloader.params.get('writeannotations', False):
2268             video_annotations = self._extract_annotations(video_id)
2269
2270         chapters = self._extract_chapters(description_original, video_duration)
2271
2272         # Look for the DASH manifest
2273         if self._downloader.params.get('youtube_include_dash_manifest', True):
2274             dash_mpd_fatal = True
2275             for mpd_url in dash_mpds:
2276                 dash_formats = {}
2277                 try:
2278                     def decrypt_sig(mobj):
2279                         s = mobj.group(1)
2280                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2281                         return '/signature/%s' % dec_s
2282
2283                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2284
2285                     for df in self._extract_mpd_formats(
2286                             mpd_url, video_id, fatal=dash_mpd_fatal,
2287                             formats_dict=self._formats):
2288                         if not df.get('filesize'):
2289                             df['filesize'] = _extract_filesize(df['url'])
2290                         # Do not overwrite DASH format found in some previous DASH manifest
2291                         if df['format_id'] not in dash_formats:
2292                             dash_formats[df['format_id']] = df
2293                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2294                         # allow them to fail without bug report message if we already have
2295                         # some DASH manifest succeeded. This is temporary workaround to reduce
2296                         # burst of bug reports until we figure out the reason and whether it
2297                         # can be fixed at all.
2298                         dash_mpd_fatal = False
2299                 except (ExtractorError, KeyError) as e:
2300                     self.report_warning(
2301                         'Skipping DASH manifest: %r' % e, video_id)
2302                 if dash_formats:
2303                     # Remove the formats we found through non-DASH, they
2304                     # contain less info and it can be wrong, because we use
2305                     # fixed values (for example the resolution). See
2306                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2307                     # example.
2308                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2309                     formats.extend(dash_formats.values())
2310
2311         # Check for malformed aspect ratio
2312         stretched_m = re.search(
2313             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2314             video_webpage)
2315         if stretched_m:
2316             w = float(stretched_m.group('w'))
2317             h = float(stretched_m.group('h'))
2318             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2319             # We will only process correct ratios.
2320             if w > 0 and h > 0:
2321                 ratio = w / h
2322                 for f in formats:
2323                     if f.get('vcodec') != 'none':
2324                         f['stretched_ratio'] = ratio
2325
2326         if not formats:
2327             token = extract_token(video_info)
2328             if not token:
2329                 if 'reason' in video_info:
2330                     if 'The uploader has not made this video available in your country.' in video_info['reason']:
2331                         regions_allowed = self._html_search_meta(
2332                             'regionsAllowed', video_webpage, default=None)
2333                         countries = regions_allowed.split(',') if regions_allowed else None
2334                         self.raise_geo_restricted(
2335                             msg=video_info['reason'][0], countries=countries)
2336                     reason = video_info['reason'][0]
2337                     if 'Invalid parameters' in reason:
2338                         unavailable_message = extract_unavailable_message()
2339                         if unavailable_message:
2340                             reason = unavailable_message
2341                     raise ExtractorError(
2342                         'YouTube said: %s' % reason,
2343                         expected=True, video_id=video_id)
2344                 else:
2345                     raise ExtractorError(
2346                         '"token" parameter not in video info for unknown reason',
2347                         video_id=video_id)
2348
2349         if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2350             raise ExtractorError('This video is DRM protected.', expected=True)
2351
2352         self._sort_formats(formats)
2353
2354         self.mark_watched(video_id, video_info, player_response)
2355
2356         return {
2357             'id': video_id,
2358             'uploader': video_uploader,
2359             'uploader_id': video_uploader_id,
2360             'uploader_url': video_uploader_url,
2361             'channel_id': channel_id,
2362             'channel_url': channel_url,
2363             'upload_date': upload_date,
2364             'license': video_license,
2365             'creator': video_creator or artist,
2366             'title': video_title,
2367             'alt_title': video_alt_title or track,
2368             'thumbnail': video_thumbnail,
2369             'description': video_description,
2370             'categories': video_categories,
2371             'tags': video_tags,
2372             'subtitles': video_subtitles,
2373             'automatic_captions': automatic_captions,
2374             'duration': video_duration,
2375             'age_limit': 18 if age_gate else 0,
2376             'annotations': video_annotations,
2377             'chapters': chapters,
2378             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2379             'view_count': view_count,
2380             'like_count': like_count,
2381             'dislike_count': dislike_count,
2382             'average_rating': average_rating,
2383             'formats': formats,
2384             'is_live': is_live,
2385             'start_time': start_time,
2386             'end_time': end_time,
2387             'series': series,
2388             'season_number': season_number,
2389             'episode_number': episode_number,
2390             'track': track,
2391             'artist': artist,
2392             'album': album,
2393             'release_date': release_date,
2394             'release_year': release_year,
2395         }
2396
2397
2398 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2399     IE_DESC = 'YouTube.com playlists'
2400     _VALID_URL = r"""(?x)(?:
2401                         (?:https?://)?
2402                         (?:\w+\.)?
2403                         (?:
2404                             (?:
2405                                 youtube\.com|
2406                                 invidio\.us
2407                             )
2408                             /
2409                             (?:
2410                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2411                                \? (?:.*?[&;])*? (?:p|a|list)=
2412                             |  p/
2413                             )|
2414                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2415                         )
2416                         (
2417                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2418                             # Top tracks, they can also include dots
2419                             |(?:MC)[\w\.]*
2420                         )
2421                         .*
2422                      |
2423                         (%(playlist_id)s)
2424                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2425     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2426     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2427     IE_NAME = 'youtube:playlist'
2428     _TESTS = [{
2429         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2430         'info_dict': {
2431             'title': 'ytdl test PL',
2432             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2433         },
2434         'playlist_count': 3,
2435     }, {
2436         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2437         'info_dict': {
2438             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2439             'title': 'YDL_Empty_List',
2440         },
2441         'playlist_count': 0,
2442         'skip': 'This playlist is private',
2443     }, {
2444         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2445         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2446         'info_dict': {
2447             'title': '29C3: Not my department',
2448             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2449         },
2450         'playlist_count': 95,
2451     }, {
2452         'note': 'issue #673',
2453         'url': 'PLBB231211A4F62143',
2454         'info_dict': {
2455             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2456             'id': 'PLBB231211A4F62143',
2457         },
2458         'playlist_mincount': 26,
2459     }, {
2460         'note': 'Large playlist',
2461         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2462         'info_dict': {
2463             'title': 'Uploads from Cauchemar',
2464             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2465         },
2466         'playlist_mincount': 799,
2467     }, {
2468         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2469         'info_dict': {
2470             'title': 'YDL_safe_search',
2471             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2472         },
2473         'playlist_count': 2,
2474         'skip': 'This playlist is private',
2475     }, {
2476         'note': 'embedded',
2477         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2478         'playlist_count': 4,
2479         'info_dict': {
2480             'title': 'JODA15',
2481             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2482         }
2483     }, {
2484         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2485         'playlist_mincount': 485,
2486         'info_dict': {
2487             'title': '2017 華語最新單曲 (2/24更新)',
2488             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2489         }
2490     }, {
2491         'note': 'Embedded SWF player',
2492         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2493         'playlist_count': 4,
2494         'info_dict': {
2495             'title': 'JODA7',
2496             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2497         }
2498     }, {
2499         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2500         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2501         'info_dict': {
2502             'title': 'Uploads from Interstellar Movie',
2503             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2504         },
2505         'playlist_mincount': 21,
2506     }, {
2507         # Playlist URL that does not actually serve a playlist
2508         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2509         'info_dict': {
2510             'id': 'FqZTN594JQw',
2511             'ext': 'webm',
2512             'title': "Smiley's People 01 detective, Adventure Series, Action",
2513             'uploader': 'STREEM',
2514             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2515             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2516             'upload_date': '20150526',
2517             'license': 'Standard YouTube License',
2518             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2519             'categories': ['People & Blogs'],
2520             'tags': list,
2521             'view_count': int,
2522             'like_count': int,
2523             'dislike_count': int,
2524         },
2525         'params': {
2526             'skip_download': True,
2527         },
2528         'add_ie': [YoutubeIE.ie_key()],
2529     }, {
2530         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2531         'info_dict': {
2532             'id': 'yeWKywCrFtk',
2533             'ext': 'mp4',
2534             'title': 'Small Scale Baler and Braiding Rugs',
2535             'uploader': 'Backus-Page House Museum',
2536             'uploader_id': 'backuspagemuseum',
2537             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2538             'upload_date': '20161008',
2539             'license': 'Standard YouTube License',
2540             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2541             'categories': ['Nonprofits & Activism'],
2542             'tags': list,
2543             'like_count': int,
2544             'dislike_count': int,
2545         },
2546         'params': {
2547             'noplaylist': True,
2548             'skip_download': True,
2549         },
2550     }, {
2551         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2552         'only_matching': True,
2553     }, {
2554         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2555         'only_matching': True,
2556     }, {
2557         # music album playlist
2558         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2559         'only_matching': True,
2560     }, {
2561         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2562         'only_matching': True,
2563     }]
2564
2565     def _real_initialize(self):
2566         self._login()
2567
2568     def _extract_mix(self, playlist_id):
2569         # The mixes are generated from a single video
2570         # the id of the playlist is just 'RD' + video_id
2571         ids = []
2572         last_id = playlist_id[-11:]
2573         for n in itertools.count(1):
2574             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2575             webpage = self._download_webpage(
2576                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2577             new_ids = orderedSet(re.findall(
2578                 r'''(?xs)data-video-username=".*?".*?
2579                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2580                 webpage))
2581             # Fetch new pages until all the videos are repeated, it seems that
2582             # there are always 51 unique videos.
2583             new_ids = [_id for _id in new_ids if _id not in ids]
2584             if not new_ids:
2585                 break
2586             ids.extend(new_ids)
2587             last_id = ids[-1]
2588
2589         url_results = self._ids_to_results(ids)
2590
2591         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2592         title_span = (
2593             search_title('playlist-title')
2594             or search_title('title long-title')
2595             or search_title('title'))
2596         title = clean_html(title_span)
2597
2598         return self.playlist_result(url_results, playlist_id, title)
2599
2600     def _extract_playlist(self, playlist_id):
2601         url = self._TEMPLATE_URL % playlist_id
2602         page = self._download_webpage(url, playlist_id)
2603
2604         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2605         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2606             match = match.strip()
2607             # Check if the playlist exists or is private
2608             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2609             if mobj:
2610                 reason = mobj.group('reason')
2611                 message = 'This playlist %s' % reason
2612                 if 'private' in reason:
2613                     message += ', use --username or --netrc to access it'
2614                 message += '.'
2615                 raise ExtractorError(message, expected=True)
2616             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2617                 raise ExtractorError(
2618                     'Invalid parameters. Maybe URL is incorrect.',
2619                     expected=True)
2620             elif re.match(r'[^<]*Choose your language[^<]*', match):
2621                 continue
2622             else:
2623                 self.report_warning('Youtube gives an alert message: ' + match)
2624
2625         playlist_title = self._html_search_regex(
2626             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2627             page, 'title', default=None)
2628
2629         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2630         uploader = self._search_regex(
2631             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2632             page, 'uploader', default=None)
2633         mobj = re.search(
2634             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2635             page)
2636         if mobj:
2637             uploader_id = mobj.group('uploader_id')
2638             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2639         else:
2640             uploader_id = uploader_url = None
2641
2642         has_videos = True
2643
2644         if not playlist_title:
2645             try:
2646                 # Some playlist URLs don't actually serve a playlist (e.g.
2647                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2648                 next(self._entries(page, playlist_id))
2649             except StopIteration:
2650                 has_videos = False
2651
2652         playlist = self.playlist_result(
2653             self._entries(page, playlist_id), playlist_id, playlist_title)
2654         playlist.update({
2655             'uploader': uploader,
2656             'uploader_id': uploader_id,
2657             'uploader_url': uploader_url,
2658         })
2659
2660         return has_videos, playlist
2661
2662     def _check_download_just_video(self, url, playlist_id):
2663         # Check if it's a video-specific URL
2664         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2665         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2666             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2667             'video id', default=None)
2668         if video_id:
2669             if self._downloader.params.get('noplaylist'):
2670                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2671                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2672             else:
2673                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2674                 return video_id, None
2675         return None, None
2676
2677     def _real_extract(self, url):
2678         # Extract playlist id
2679         mobj = re.match(self._VALID_URL, url)
2680         if mobj is None:
2681             raise ExtractorError('Invalid URL: %s' % url)
2682         playlist_id = mobj.group(1) or mobj.group(2)
2683
2684         video_id, video = self._check_download_just_video(url, playlist_id)
2685         if video:
2686             return video
2687
2688         if playlist_id.startswith(('RD', 'UL', 'PU')):
2689             # Mixes require a custom extraction process
2690             return self._extract_mix(playlist_id)
2691
2692         has_videos, playlist = self._extract_playlist(playlist_id)
2693         if has_videos or not video_id:
2694             return playlist
2695
2696         # Some playlist URLs don't actually serve a playlist (see
2697         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2698         # Fallback to plain video extraction if there is a video id
2699         # along with playlist id.
2700         return self.url_result(video_id, 'Youtube', video_id=video_id)
2701
2702
2703 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2704     IE_DESC = 'YouTube.com channels'
2705     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2706     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2707     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2708     IE_NAME = 'youtube:channel'
2709     _TESTS = [{
2710         'note': 'paginated channel',
2711         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2712         'playlist_mincount': 91,
2713         'info_dict': {
2714             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2715             'title': 'Uploads from lex will',
2716         }
2717     }, {
2718         'note': 'Age restricted channel',
2719         # from https://www.youtube.com/user/DeusExOfficial
2720         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2721         'playlist_mincount': 64,
2722         'info_dict': {
2723             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2724             'title': 'Uploads from Deus Ex',
2725         },
2726     }, {
2727         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2728         'only_matching': True,
2729     }]
2730
2731     @classmethod
2732     def suitable(cls, url):
2733         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2734                 else super(YoutubeChannelIE, cls).suitable(url))
2735
2736     def _build_template_url(self, url, channel_id):
2737         return self._TEMPLATE_URL % channel_id
2738
2739     def _real_extract(self, url):
2740         channel_id = self._match_id(url)
2741
2742         url = self._build_template_url(url, channel_id)
2743
2744         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2745         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2746         # otherwise fallback on channel by page extraction
2747         channel_page = self._download_webpage(
2748             url + '?view=57', channel_id,
2749             'Downloading channel page', fatal=False)
2750         if channel_page is False:
2751             channel_playlist_id = False
2752         else:
2753             channel_playlist_id = self._html_search_meta(
2754                 'channelId', channel_page, 'channel id', default=None)
2755             if not channel_playlist_id:
2756                 channel_url = self._html_search_meta(
2757                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2758                     channel_page, 'channel url', default=None)
2759                 if channel_url:
2760                     channel_playlist_id = self._search_regex(
2761                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2762                         channel_url, 'channel id', default=None)
2763         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2764             playlist_id = 'UU' + channel_playlist_id[2:]
2765             return self.url_result(
2766                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2767
2768         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2769         autogenerated = re.search(r'''(?x)
2770                 class="[^"]*?(?:
2771                     channel-header-autogenerated-label|
2772                     yt-channel-title-autogenerated
2773                 )[^"]*"''', channel_page) is not None
2774
2775         if autogenerated:
2776             # The videos are contained in a single page
2777             # the ajax pages can't be used, they are empty
2778             entries = [
2779                 self.url_result(
2780                     video_id, 'Youtube', video_id=video_id,
2781                     video_title=video_title)
2782                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2783             return self.playlist_result(entries, channel_id)
2784
2785         try:
2786             next(self._entries(channel_page, channel_id))
2787         except StopIteration:
2788             alert_message = self._html_search_regex(
2789                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2790                 channel_page, 'alert', default=None, group='alert')
2791             if alert_message:
2792                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2793
2794         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2795
2796
2797 class YoutubeUserIE(YoutubeChannelIE):
2798     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2799     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2800     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2801     IE_NAME = 'youtube:user'
2802
2803     _TESTS = [{
2804         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2805         'playlist_mincount': 320,
2806         'info_dict': {
2807             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2808             'title': 'Uploads from The Linux Foundation',
2809         }
2810     }, {
2811         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2812         # but not https://www.youtube.com/user/12minuteathlete/videos
2813         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2814         'playlist_mincount': 249,
2815         'info_dict': {
2816             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2817             'title': 'Uploads from 12 Minute Athlete',
2818         }
2819     }, {
2820         'url': 'ytuser:phihag',
2821         'only_matching': True,
2822     }, {
2823         'url': 'https://www.youtube.com/c/gametrailers',
2824         'only_matching': True,
2825     }, {
2826         'url': 'https://www.youtube.com/gametrailers',
2827         'only_matching': True,
2828     }, {
2829         # This channel is not available, geo restricted to JP
2830         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2831         'only_matching': True,
2832     }]
2833
2834     @classmethod
2835     def suitable(cls, url):
2836         # Don't return True if the url can be extracted with other youtube
2837         # extractor, the regex would is too permissive and it would match.
2838         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2839         if any(ie.suitable(url) for ie in other_yt_ies):
2840             return False
2841         else:
2842             return super(YoutubeUserIE, cls).suitable(url)
2843
2844     def _build_template_url(self, url, channel_id):
2845         mobj = re.match(self._VALID_URL, url)
2846         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2847
2848
2849 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2850     IE_DESC = 'YouTube.com live streams'
2851     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2852     IE_NAME = 'youtube:live'
2853
2854     _TESTS = [{
2855         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2856         'info_dict': {
2857             'id': 'a48o2S1cPoo',
2858             'ext': 'mp4',
2859             'title': 'The Young Turks - Live Main Show',
2860             'uploader': 'The Young Turks',
2861             'uploader_id': 'TheYoungTurks',
2862             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2863             'upload_date': '20150715',
2864             'license': 'Standard YouTube License',
2865             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2866             'categories': ['News & Politics'],
2867             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2868             'like_count': int,
2869             'dislike_count': int,
2870         },
2871         'params': {
2872             'skip_download': True,
2873         },
2874     }, {
2875         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2876         'only_matching': True,
2877     }, {
2878         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2879         'only_matching': True,
2880     }, {
2881         'url': 'https://www.youtube.com/TheYoungTurks/live',
2882         'only_matching': True,
2883     }]
2884
2885     def _real_extract(self, url):
2886         mobj = re.match(self._VALID_URL, url)
2887         channel_id = mobj.group('id')
2888         base_url = mobj.group('base_url')
2889         webpage = self._download_webpage(url, channel_id, fatal=False)
2890         if webpage:
2891             page_type = self._og_search_property(
2892                 'type', webpage, 'page type', default='')
2893             video_id = self._html_search_meta(
2894                 'videoId', webpage, 'video id', default=None)
2895             if page_type.startswith('video') and video_id and re.match(
2896                     r'^[0-9A-Za-z_-]{11}$', video_id):
2897                 return self.url_result(video_id, YoutubeIE.ie_key())
2898         return self.url_result(base_url)
2899
2900
2901 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2902     IE_DESC = 'YouTube.com user/channel playlists'
2903     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2904     IE_NAME = 'youtube:playlists'
2905
2906     _TESTS = [{
2907         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2908         'playlist_mincount': 4,
2909         'info_dict': {
2910             'id': 'ThirstForScience',
2911             'title': 'Thirst for Science',
2912         },
2913     }, {
2914         # with "Load more" button
2915         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2916         'playlist_mincount': 70,
2917         'info_dict': {
2918             'id': 'igorkle1',
2919             'title': 'Игорь Клейнер',
2920         },
2921     }, {
2922         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2923         'playlist_mincount': 17,
2924         'info_dict': {
2925             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2926             'title': 'Chem Player',
2927         },
2928     }]
2929
2930
2931 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2932     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2933
2934
2935 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2936     IE_DESC = 'YouTube.com searches'
2937     # there doesn't appear to be a real limit, for example if you search for
2938     # 'python' you get more than 8.000.000 results
2939     _MAX_RESULTS = float('inf')
2940     IE_NAME = 'youtube:search'
2941     _SEARCH_KEY = 'ytsearch'
2942     _EXTRA_QUERY_ARGS = {}
2943     _TESTS = []
2944
2945     def _get_n_results(self, query, n):
2946         """Get a specified number of results for a query"""
2947
2948         videos = []
2949         limit = n
2950
2951         url_query = {
2952             'search_query': query.encode('utf-8'),
2953         }
2954         url_query.update(self._EXTRA_QUERY_ARGS)
2955         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2956
2957         for pagenum in itertools.count(1):
2958             data = self._download_json(
2959                 result_url, video_id='query "%s"' % query,
2960                 note='Downloading page %s' % pagenum,
2961                 errnote='Unable to download API page',
2962                 query={'spf': 'navigate'})
2963             html_content = data[1]['body']['content']
2964
2965             if 'class="search-message' in html_content:
2966                 raise ExtractorError(
2967                     '[youtube] No video results', expected=True)
2968
2969             new_videos = list(self._process_page(html_content))
2970             videos += new_videos
2971             if not new_videos or len(videos) > limit:
2972                 break
2973             next_link = self._html_search_regex(
2974                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2975                 html_content, 'next link', default=None)
2976             if next_link is None:
2977                 break
2978             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2979
2980         if len(videos) > n:
2981             videos = videos[:n]
2982         return self.playlist_result(videos, query)
2983
2984
2985 class YoutubeSearchDateIE(YoutubeSearchIE):
2986     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2987     _SEARCH_KEY = 'ytsearchdate'
2988     IE_DESC = 'YouTube.com searches, newest videos first'
2989     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2990
2991
2992 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2993     IE_DESC = 'YouTube.com search URLs'
2994     IE_NAME = 'youtube:search_url'
2995     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2996     _TESTS = [{
2997         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2998         'playlist_mincount': 5,
2999         'info_dict': {
3000             'title': 'youtube-dl test video',
3001         }
3002     }, {
3003         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3004         'only_matching': True,
3005     }]
3006
3007     def _real_extract(self, url):
3008         mobj = re.match(self._VALID_URL, url)
3009         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3010         webpage = self._download_webpage(url, query)
3011         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3012
3013
3014 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3015     IE_DESC = 'YouTube.com (multi-season) shows'
3016     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3017     IE_NAME = 'youtube:show'
3018     _TESTS = [{
3019         'url': 'https://www.youtube.com/show/airdisasters',
3020         'playlist_mincount': 5,
3021         'info_dict': {
3022             'id': 'airdisasters',
3023             'title': 'Air Disasters',
3024         }
3025     }]
3026
3027     def _real_extract(self, url):
3028         playlist_id = self._match_id(url)
3029         return super(YoutubeShowIE, self)._real_extract(
3030             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3031
3032
3033 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3034     """
3035     Base class for feed extractors
3036     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3037     """
3038     _LOGIN_REQUIRED = True
3039
3040     @property
3041     def IE_NAME(self):
3042         return 'youtube:%s' % self._FEED_NAME
3043
3044     def _real_initialize(self):
3045         self._login()
3046
3047     def _entries(self, page):
3048         # The extraction process is the same as for playlists, but the regex
3049         # for the video ids doesn't contain an index
3050         ids = []
3051         more_widget_html = content_html = page
3052         for page_num in itertools.count(1):
3053             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3054
3055             # 'recommended' feed has infinite 'load more' and each new portion spins
3056             # the same videos in (sometimes) slightly different order, so we'll check
3057             # for unicity and break when portion has no new videos
3058             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3059             if not new_ids:
3060                 break
3061
3062             ids.extend(new_ids)
3063
3064             for entry in self._ids_to_results(new_ids):
3065                 yield entry
3066
3067             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3068             if not mobj:
3069                 break
3070
3071             more = self._download_json(
3072                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3073                 'Downloading page #%s' % page_num,
3074                 transform_source=uppercase_escape)
3075             content_html = more['content_html']
3076             more_widget_html = more['load_more_widget_html']
3077
3078     def _real_extract(self, url):
3079         page = self._download_webpage(
3080             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3081             self._PLAYLIST_TITLE)
3082         return self.playlist_result(
3083             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3084
3085
3086 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3087     IE_NAME = 'youtube:watchlater'
3088     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3089     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3090
3091     _TESTS = [{
3092         'url': 'https://www.youtube.com/playlist?list=WL',
3093         'only_matching': True,
3094     }, {
3095         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3096         'only_matching': True,
3097     }]
3098
3099     def _real_extract(self, url):
3100         _, video = self._check_download_just_video(url, 'WL')
3101         if video:
3102             return video
3103         _, playlist = self._extract_playlist('WL')
3104         return playlist
3105
3106
3107 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3108     IE_NAME = 'youtube:favorites'
3109     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3110     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3111     _LOGIN_REQUIRED = True
3112
3113     def _real_extract(self, url):
3114         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3115         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3116         return self.url_result(playlist_id, 'YoutubePlaylist')
3117
3118
3119 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3120     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3121     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3122     _FEED_NAME = 'recommended'
3123     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3124
3125
3126 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3127     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3128     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3129     _FEED_NAME = 'subscriptions'
3130     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3131
3132
3133 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3134     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3135     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3136     _FEED_NAME = 'history'
3137     _PLAYLIST_TITLE = 'Youtube History'
3138
3139
3140 class YoutubeTruncatedURLIE(InfoExtractor):
3141     IE_NAME = 'youtube:truncated_url'
3142     IE_DESC = False  # Do not list
3143     _VALID_URL = r'''(?x)
3144         (?:https?://)?
3145         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3146         (?:watch\?(?:
3147             feature=[a-z_]+|
3148             annotation_id=annotation_[^&]+|
3149             x-yt-cl=[0-9]+|
3150             hl=[^&]*|
3151             t=[0-9]+
3152         )?
3153         |
3154             attribution_link\?a=[^&]+
3155         )
3156         $
3157     '''
3158
3159     _TESTS = [{
3160         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3161         'only_matching': True,
3162     }, {
3163         'url': 'https://www.youtube.com/watch?',
3164         'only_matching': True,
3165     }, {
3166         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3167         'only_matching': True,
3168     }, {
3169         'url': 'https://www.youtube.com/watch?feature=foo',
3170         'only_matching': True,
3171     }, {
3172         'url': 'https://www.youtube.com/watch?hl=en-GB',
3173         'only_matching': True,
3174     }, {
3175         'url': 'https://www.youtube.com/watch?t=2372',
3176         'only_matching': True,
3177     }]
3178
3179     def _real_extract(self, url):
3180         raise ExtractorError(
3181             'Did you forget to quote the URL? Remember that & is a meta '
3182             'character in most shells, so you want to put the URL in quotes, '
3183             'like  youtube-dl '
3184             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3185             ' or simply  youtube-dl BaW_jenozKc  .',
3186             expected=True)
3187
3188
3189 class YoutubeTruncatedIDIE(InfoExtractor):
3190     IE_NAME = 'youtube:truncated_id'
3191     IE_DESC = False  # Do not list
3192     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3193
3194     _TESTS = [{
3195         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3196         'only_matching': True,
3197     }]
3198
3199     def _real_extract(self, url):
3200         video_id = self._match_id(url)
3201         raise ExtractorError(
3202             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3203             expected=True)