youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         login_challenge = try_get(res, lambda x: x[0][0], list)
 182         if login_challenge:
 183             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 184             if challenge_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231             else:
 232                 CHALLENGES = {
 233                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 234                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 235                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 236                 }
 237                 challenge = CHALLENGES.get(
 238                     challenge_str,
 239                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 240                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 241                 return False
 242         else:
 243             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 244
 245         if not check_cookie_url:
 246             warn('Unable to extract CheckCookie URL')
 247             return False
 248
 249         check_cookie_results = self._download_webpage(
 250             check_cookie_url, None, 'Checking cookie', fatal=False)
 251
 252         if check_cookie_results is False:
 253             return False
 254
 255         if 'https://myaccount.google.com/' not in check_cookie_results:
 256             warn('Unable to log in')
 257             return False
 258
 259         return True
 260
 261     def _download_webpage_handle(self, *args, **kwargs):
 262         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
 263         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 264             *args, **compat_kwargs(kwargs))
 265
 266     def _real_initialize(self):
 267         if self._downloader is None:
 268             return
 269         self._set_language()
 270         if not self._login():
 271             return
 272
 273
 274 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 275     # Extract entries from page with "Load more" button
 276     def _entries(self, page, playlist_id):
 277         more_widget_html = content_html = page
 278         for page_num in itertools.count(1):
 279             for entry in self._process_page(content_html):
 280                 yield entry
 281
 282             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 283             if not mobj:
 284                 break
 285
 286             more = self._download_json(
 287                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 288                 'Downloading page #%s' % page_num,
 289                 transform_source=uppercase_escape)
 290             content_html = more['content_html']
 291             if not content_html.strip():
 292                 # Some webpages show a "Load more" button but they don't
 293                 # have more videos
 294                 break
 295             more_widget_html = more['load_more_widget_html']
 296
 297
 298 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 299     def _process_page(self, content):
 300         for video_id, video_title in self.extract_videos_from_page(content):
 301             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 302
 303     def extract_videos_from_page(self, page):
 304         ids_in_page = []
 305         titles_in_page = []
 306         for mobj in re.finditer(self._VIDEO_RE, page):
 307             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 308             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 309                 continue
 310             video_id = mobj.group('id')
 311             video_title = unescapeHTML(mobj.group('title'))
 312             if video_title:
 313                 video_title = video_title.strip()
 314             try:
 315                 idx = ids_in_page.index(video_id)
 316                 if video_title and not titles_in_page[idx]:
 317                     titles_in_page[idx] = video_title
 318             except ValueError:
 319                 ids_in_page.append(video_id)
 320                 titles_in_page.append(video_title)
 321         return zip(ids_in_page, titles_in_page)
 322
 323
 324 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 325     def _process_page(self, content):
 326         for playlist_id in orderedSet(re.findall(
 327                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 328                 content)):
 329             yield self.url_result(
 330                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 331
 332     def _real_extract(self, url):
 333         playlist_id = self._match_id(url)
 334         webpage = self._download_webpage(url, playlist_id)
 335         title = self._og_search_title(webpage, fatal=False)
 336         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 337
 338
 339 class YoutubeIE(YoutubeBaseInfoExtractor):
 340     IE_DESC = 'YouTube.com'
 341     _VALID_URL = r"""(?x)^
 342                      (
 343                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 344                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 345                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 346                             (?:www\.)?pwnyoutube\.com/|
 347                             (?:www\.)?hooktube\.com/|
 348                             (?:www\.)?yourepeat\.com/|
 349                             tube\.majestyc\.net/|
 350                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 351                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 352                          (?:                                                  # the various things that can precede the ID:
 353                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 354                              |(?:                                             # or the v= param in all its forms
 355                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 356                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 357                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 358                                  v=
 359                              )
 360                          ))
 361                          |(?:
 362                             youtu\.be|                                        # just youtu.be/xxxx
 363                             vid\.plus|                                        # or vid.plus/xxxx
 364                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 365                          )/
 366                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 367                          )
 368                      )?                                                       # all until now is optional -> you can pass the naked ID
 369                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 370                      (?!.*?\blist=
 371                         (?:
 372                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 373                             WL                                                # WL are handled by the watch later IE
 374                         )
 375                      )
 376                      (?(1).+)?                                                # if we found the ID, everything can follow
 377                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 378     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 379     _formats = {
 380         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 381         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 382         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 383         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 384         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 385         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 386         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 387         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 388         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 389         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 390         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 391         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 392         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 393         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 394         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 395         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 396         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 397         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 398
 399
 400         # 3D videos
 401         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 402         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 403         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 404         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 405         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 406         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 407         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 408
 409         # Apple HTTP Live Streaming
 410         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 411         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 412         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 413         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 414         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 415         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 416         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 417         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 418
 419         # DASH mp4 video
 420         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 421         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 422         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 423         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 424         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 425         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 426         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 427         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 428         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 429         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 430         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 431         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 432
 433         # Dash mp4 audio
 434         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 435         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 436         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 437         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 438         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 439         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 440         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 441
 442         # Dash webm
 443         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 444         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 445         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 446         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 447         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 448         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 449         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 450         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 451         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 452         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 454         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 455         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 456         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 457         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 458         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 459         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 461         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 462         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 463         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 464         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 465
 466         # Dash webm audio
 467         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 468         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 469
 470         # Dash webm audio with opus inside
 471         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 472         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 473         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 474
 475         # RTMP (unnamed)
 476         '_rtmp': {'protocol': 'rtmp'},
 477     }
 478     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 479
 480     _GEO_BYPASS = False
 481
 482     IE_NAME = 'youtube'
 483     _TESTS = [
 484         {
 485             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 486             'info_dict': {
 487                 'id': 'BaW_jenozKc',
 488                 'ext': 'mp4',
 489                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 490                 'uploader': 'Philipp Hagemeister',
 491                 'uploader_id': 'phihag',
 492                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 493                 'upload_date': '20121002',
 494                 'license': 'Standard YouTube License',
 495                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 496                 'categories': ['Science & Technology'],
 497                 'tags': ['youtube-dl'],
 498                 'duration': 10,
 499                 'like_count': int,
 500                 'dislike_count': int,
 501                 'start_time': 1,
 502                 'end_time': 9,
 503             }
 504         },
 505         {
 506             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 507             'note': 'Test generic use_cipher_signature video (#897)',
 508             'info_dict': {
 509                 'id': 'UxxajLWwzqY',
 510                 'ext': 'mp4',
 511                 'upload_date': '20120506',
 512                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 513                 'alt_title': 'I Love It (feat. Charli XCX)',
 514                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 515                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 516                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 517                          'iconic ep', 'iconic', 'love', 'it'],
 518                 'duration': 180,
 519                 'uploader': 'Icona Pop',
 520                 'uploader_id': 'IconaPop',
 521                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 522                 'license': 'Standard YouTube License',
 523                 'creator': 'Icona Pop',
 524                 'track': 'I Love It (feat. Charli XCX)',
 525                 'artist': 'Icona Pop',
 526             }
 527         },
 528         {
 529             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 530             'note': 'Test VEVO video with age protection (#956)',
 531             'info_dict': {
 532                 'id': '07FYdnEawAQ',
 533                 'ext': 'mp4',
 534                 'upload_date': '20130703',
 535                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 536                 'alt_title': 'Tunnel Vision',
 537                 'description': 'md5:64249768eec3bc4276236606ea996373',
 538                 'duration': 419,
 539                 'uploader': 'justintimberlakeVEVO',
 540                 'uploader_id': 'justintimberlakeVEVO',
 541                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 542                 'license': 'Standard YouTube License',
 543                 'creator': 'Justin Timberlake',
 544                 'track': 'Tunnel Vision',
 545                 'artist': 'Justin Timberlake',
 546                 'age_limit': 18,
 547             }
 548         },
 549         {
 550             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 551             'note': 'Embed-only video (#1746)',
 552             'info_dict': {
 553                 'id': 'yZIXLfi8CZQ',
 554                 'ext': 'mp4',
 555                 'upload_date': '20120608',
 556                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 557                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 558                 'uploader': 'SET India',
 559                 'uploader_id': 'setindia',
 560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 561                 'license': 'Standard YouTube License',
 562                 'age_limit': 18,
 563             }
 564         },
 565         {
 566             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 567             'note': 'Use the first video ID in the URL',
 568             'info_dict': {
 569                 'id': 'BaW_jenozKc',
 570                 'ext': 'mp4',
 571                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 572                 'uploader': 'Philipp Hagemeister',
 573                 'uploader_id': 'phihag',
 574                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 575                 'upload_date': '20121002',
 576                 'license': 'Standard YouTube License',
 577                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 578                 'categories': ['Science & Technology'],
 579                 'tags': ['youtube-dl'],
 580                 'duration': 10,
 581                 'like_count': int,
 582                 'dislike_count': int,
 583             },
 584             'params': {
 585                 'skip_download': True,
 586             },
 587         },
 588         {
 589             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 590             'note': '256k DASH audio (format 141) via DASH manifest',
 591             'info_dict': {
 592                 'id': 'a9LDPn-MO4I',
 593                 'ext': 'm4a',
 594                 'upload_date': '20121002',
 595                 'uploader_id': '8KVIDEO',
 596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 597                 'description': '',
 598                 'uploader': '8KVIDEO',
 599                 'license': 'Standard YouTube License',
 600                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 601             },
 602             'params': {
 603                 'youtube_include_dash_manifest': True,
 604                 'format': '141',
 605             },
 606             'skip': 'format 141 not served anymore',
 607         },
 608         # DASH manifest with encrypted signature
 609         {
 610             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 611             'info_dict': {
 612                 'id': 'IB3lcPjvWLA',
 613                 'ext': 'm4a',
 614                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 615                 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
 616                 'duration': 244,
 617                 'uploader': 'AfrojackVEVO',
 618                 'uploader_id': 'AfrojackVEVO',
 619                 'upload_date': '20131011',
 620                 'license': 'Standard YouTube License',
 621             },
 622             'params': {
 623                 'youtube_include_dash_manifest': True,
 624                 'format': '141/bestaudio[ext=m4a]',
 625             },
 626         },
 627         # JS player signature function name containing $
 628         {
 629             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 630             'info_dict': {
 631                 'id': 'nfWlot6h_JM',
 632                 'ext': 'm4a',
 633                 'title': 'Taylor Swift - Shake It Off',
 634                 'alt_title': 'Shake It Off',
 635                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 636                 'duration': 242,
 637                 'uploader': 'TaylorSwiftVEVO',
 638                 'uploader_id': 'TaylorSwiftVEVO',
 639                 'upload_date': '20140818',
 640                 'license': 'Standard YouTube License',
 641                 'creator': 'Taylor Swift',
 642             },
 643             'params': {
 644                 'youtube_include_dash_manifest': True,
 645                 'format': '141/bestaudio[ext=m4a]',
 646             },
 647         },
 648         # Controversy video
 649         {
 650             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 651             'info_dict': {
 652                 'id': 'T4XJQO3qol8',
 653                 'ext': 'mp4',
 654                 'duration': 219,
 655                 'upload_date': '20100909',
 656                 'uploader': 'TJ Kirk',
 657                 'uploader_id': 'TheAmazingAtheist',
 658                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 659                 'license': 'Standard YouTube License',
 660                 'title': 'Burning Everyone\'s Koran',
 661                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 662             }
 663         },
 664         # Normal age-gate video (No vevo, embed allowed)
 665         {
 666             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 667             'info_dict': {
 668                 'id': 'HtVdAasjOgU',
 669                 'ext': 'mp4',
 670                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 671                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 672                 'duration': 142,
 673                 'uploader': 'The Witcher',
 674                 'uploader_id': 'WitcherGame',
 675                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 676                 'upload_date': '20140605',
 677                 'license': 'Standard YouTube License',
 678                 'age_limit': 18,
 679             },
 680         },
 681         # Age-gate video with encrypted signature
 682         {
 683             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 684             'info_dict': {
 685                 'id': '6kLq3WMV1nU',
 686                 'ext': 'webm',
 687                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 688                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 689                 'duration': 246,
 690                 'uploader': 'LloydVEVO',
 691                 'uploader_id': 'LloydVEVO',
 692                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 693                 'upload_date': '20110629',
 694                 'license': 'Standard YouTube License',
 695                 'age_limit': 18,
 696             },
 697         },
 698         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 699         # YouTube Red ad is not captured for creator
 700         {
 701             'url': '__2ABJjxzNo',
 702             'info_dict': {
 703                 'id': '__2ABJjxzNo',
 704                 'ext': 'mp4',
 705                 'duration': 266,
 706                 'upload_date': '20100430',
 707                 'uploader_id': 'deadmau5',
 708                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 709                 'creator': 'deadmau5',
 710                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 711                 'uploader': 'deadmau5',
 712                 'license': 'Standard YouTube License',
 713                 'title': 'Deadmau5 - Some Chords (HD)',
 714                 'alt_title': 'Some Chords',
 715             },
 716             'expected_warnings': [
 717                 'DASH manifest missing',
 718             ]
 719         },
 720         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 721         {
 722             'url': 'lqQg6PlCWgI',
 723             'info_dict': {
 724                 'id': 'lqQg6PlCWgI',
 725                 'ext': 'mp4',
 726                 'duration': 6085,
 727                 'upload_date': '20150827',
 728                 'uploader_id': 'olympic',
 729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 730                 'license': 'Standard YouTube License',
 731                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 732                 'uploader': 'Olympic',
 733                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 734             },
 735             'params': {
 736                 'skip_download': 'requires avconv',
 737             }
 738         },
 739         # Non-square pixels
 740         {
 741             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 742             'info_dict': {
 743                 'id': '_b-2C3KPAM0',
 744                 'ext': 'mp4',
 745                 'stretched_ratio': 16 / 9.,
 746                 'duration': 85,
 747                 'upload_date': '20110310',
 748                 'uploader_id': 'AllenMeow',
 749                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 750                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 751                 'uploader': '孫ᄋᄅ',
 752                 'license': 'Standard YouTube License',
 753                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 754             },
 755         },
 756         # url_encoded_fmt_stream_map is empty string
 757         {
 758             'url': 'qEJwOuvDf7I',
 759             'info_dict': {
 760                 'id': 'qEJwOuvDf7I',
 761                 'ext': 'webm',
 762                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 763                 'description': '',
 764                 'upload_date': '20150404',
 765                 'uploader_id': 'spbelect',
 766                 'uploader': 'Наблюдатели Петербурга',
 767             },
 768             'params': {
 769                 'skip_download': 'requires avconv',
 770             },
 771             'skip': 'This live event has ended.',
 772         },
 773         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 774         {
 775             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 776             'info_dict': {
 777                 'id': 'FIl7x6_3R5Y',
 778                 'ext': 'webm',
 779                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 780                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 781                 'duration': 220,
 782                 'upload_date': '20150625',
 783                 'uploader_id': 'dorappi2000',
 784                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 785                 'uploader': 'dorappi2000',
 786                 'license': 'Standard YouTube License',
 787                 'formats': 'mincount:31',
 788             },
 789             'skip': 'not actual anymore',
 790         },
 791         # DASH manifest with segment_list
 792         {
 793             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 794             'md5': '8ce563a1d667b599d21064e982ab9e31',
 795             'info_dict': {
 796                 'id': 'CsmdDsKjzN8',
 797                 'ext': 'mp4',
 798                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 799                 'uploader': 'Airtek',
 800                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 801                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 802                 'license': 'Standard YouTube License',
 803                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 804             },
 805             'params': {
 806                 'youtube_include_dash_manifest': True,
 807                 'format': '135',  # bestvideo
 808             },
 809             'skip': 'This live event has ended.',
 810         },
 811         {
 812             # Multifeed videos (multiple cameras), URL is for Main Camera
 813             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 814             'info_dict': {
 815                 'id': 'jqWvoWXjCVs',
 816                 'title': 'teamPGP: Rocket League Noob Stream',
 817                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 818             },
 819             'playlist': [{
 820                 'info_dict': {
 821                     'id': 'jqWvoWXjCVs',
 822                     'ext': 'mp4',
 823                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 824                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 825                     'duration': 7335,
 826                     'upload_date': '20150721',
 827                     'uploader': 'Beer Games Beer',
 828                     'uploader_id': 'beergamesbeer',
 829                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 830                     'license': 'Standard YouTube License',
 831                 },
 832             }, {
 833                 'info_dict': {
 834                     'id': '6h8e8xoXJzg',
 835                     'ext': 'mp4',
 836                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 837                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 838                     'duration': 7337,
 839                     'upload_date': '20150721',
 840                     'uploader': 'Beer Games Beer',
 841                     'uploader_id': 'beergamesbeer',
 842                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 843                     'license': 'Standard YouTube License',
 844                 },
 845             }, {
 846                 'info_dict': {
 847                     'id': 'PUOgX5z9xZw',
 848                     'ext': 'mp4',
 849                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 850                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 851                     'duration': 7337,
 852                     'upload_date': '20150721',
 853                     'uploader': 'Beer Games Beer',
 854                     'uploader_id': 'beergamesbeer',
 855                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 856                     'license': 'Standard YouTube License',
 857                 },
 858             }, {
 859                 'info_dict': {
 860                     'id': 'teuwxikvS5k',
 861                     'ext': 'mp4',
 862                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 863                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 864                     'duration': 7334,
 865                     'upload_date': '20150721',
 866                     'uploader': 'Beer Games Beer',
 867                     'uploader_id': 'beergamesbeer',
 868                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 869                     'license': 'Standard YouTube License',
 870                 },
 871             }],
 872             'params': {
 873                 'skip_download': True,
 874             },
 875         },
 876         {
 877             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 878             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 879             'info_dict': {
 880                 'id': 'gVfLd0zydlo',
 881                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 882             },
 883             'playlist_count': 2,
 884             'skip': 'Not multifeed anymore',
 885         },
 886         {
 887             'url': 'https://vid.plus/FlRa-iH7PGw',
 888             'only_matching': True,
 889         },
 890         {
 891             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 892             'only_matching': True,
 893         },
 894         {
 895             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 896             # Also tests cut-off URL expansion in video description (see
 897             # https://github.com/rg3/youtube-dl/issues/1892,
 898             # https://github.com/rg3/youtube-dl/issues/8164)
 899             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 900             'info_dict': {
 901                 'id': 'lsguqyKfVQg',
 902                 'ext': 'mp4',
 903                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 904                 'alt_title': 'Dark Walk - Position Music',
 905                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 906                 'duration': 133,
 907                 'upload_date': '20151119',
 908                 'uploader_id': 'IronSoulElf',
 909                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 910                 'uploader': 'IronSoulElf',
 911                 'license': 'Standard YouTube License',
 912                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 913                 'track': 'Dark Walk - Position Music',
 914                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 915             },
 916             'params': {
 917                 'skip_download': True,
 918             },
 919         },
 920         {
 921             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 922             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 923             'only_matching': True,
 924         },
 925         {
 926             # Video with yt:stretch=17:0
 927             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 928             'info_dict': {
 929                 'id': 'Q39EVAstoRM',
 930                 'ext': 'mp4',
 931                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 932                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 933                 'upload_date': '20151107',
 934                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 935                 'uploader': 'CH GAMER DROID',
 936             },
 937             'params': {
 938                 'skip_download': True,
 939             },
 940             'skip': 'This video does not exist.',
 941         },
 942         {
 943             # Video licensed under Creative Commons
 944             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 945             'info_dict': {
 946                 'id': 'M4gD1WSo5mA',
 947                 'ext': 'mp4',
 948                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 949                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 950                 'duration': 721,
 951                 'upload_date': '20150127',
 952                 'uploader_id': 'BerkmanCenter',
 953                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 954                 'uploader': 'The Berkman Klein Center for Internet & Society',
 955                 'license': 'Creative Commons Attribution license (reuse allowed)',
 956             },
 957             'params': {
 958                 'skip_download': True,
 959             },
 960         },
 961         {
 962             # Channel-like uploader_url
 963             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 964             'info_dict': {
 965                 'id': 'eQcmzGIKrzg',
 966                 'ext': 'mp4',
 967                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 968                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 969                 'duration': 4060,
 970                 'upload_date': '20151119',
 971                 'uploader': 'Bernie Sanders',
 972                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 973                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 974                 'license': 'Creative Commons Attribution license (reuse allowed)',
 975             },
 976             'params': {
 977                 'skip_download': True,
 978             },
 979         },
 980         {
 981             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 982             'only_matching': True,
 983         },
 984         {
 985             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 986             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 987             'only_matching': True,
 988         },
 989         {
 990             # Rental video preview
 991             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 992             'info_dict': {
 993                 'id': 'uGpuVWrhIzE',
 994                 'ext': 'mp4',
 995                 'title': 'Piku - Trailer',
 996                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 997                 'upload_date': '20150811',
 998                 'uploader': 'FlixMatrix',
 999                 'uploader_id': 'FlixMatrixKaravan',
1000                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1001                 'license': 'Standard YouTube License',
1002             },
1003             'params': {
1004                 'skip_download': True,
1005             },
1006             'skip': 'This video is not available.',
1007         },
1008         {
1009             # YouTube Red video with episode data
1010             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1011             'info_dict': {
1012                 'id': 'iqKdEhx-dD4',
1013                 'ext': 'mp4',
1014                 'title': 'Isolation - Mind Field (Ep 1)',
1015                 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1016                 'duration': 2085,
1017                 'upload_date': '20170118',
1018                 'uploader': 'Vsauce',
1019                 'uploader_id': 'Vsauce',
1020                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1021                 'license': 'Standard YouTube License',
1022                 'series': 'Mind Field',
1023                 'season_number': 1,
1024                 'episode_number': 1,
1025             },
1026             'params': {
1027                 'skip_download': True,
1028             },
1029             'expected_warnings': [
1030                 'Skipping DASH manifest',
1031             ],
1032         },
1033         {
1034             # The following content has been identified by the YouTube community
1035             # as inappropriate or offensive to some audiences.
1036             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1037             'info_dict': {
1038                 'id': '6SJNVb0GnPI',
1039                 'ext': 'mp4',
1040                 'title': 'Race Differences in Intelligence',
1041                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1042                 'duration': 965,
1043                 'upload_date': '20140124',
1044                 'uploader': 'New Century Foundation',
1045                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1046                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1047                 'license': 'Standard YouTube License',
1048             },
1049             'params': {
1050                 'skip_download': True,
1051             },
1052         },
1053         {
1054             # itag 212
1055             'url': '1t24XAntNCY',
1056             'only_matching': True,
1057         },
1058         {
1059             # geo restricted to JP
1060             'url': 'sJL6WA-aGkQ',
1061             'only_matching': True,
1062         },
1063         {
1064             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1065             'only_matching': True,
1066         },
1067     ]
1068
1069     def __init__(self, *args, **kwargs):
1070         super(YoutubeIE, self).__init__(*args, **kwargs)
1071         self._player_cache = {}
1072
1073     def report_video_info_webpage_download(self, video_id):
1074         """Report attempt to download video info webpage."""
1075         self.to_screen('%s: Downloading video info webpage' % video_id)
1076
1077     def report_information_extraction(self, video_id):
1078         """Report attempt to extract video information."""
1079         self.to_screen('%s: Extracting video information' % video_id)
1080
1081     def report_unavailable_format(self, video_id, format):
1082         """Report extracted video URL."""
1083         self.to_screen('%s: Format %s not available' % (video_id, format))
1084
1085     def report_rtmp_download(self):
1086         """Indicate the download will use the RTMP protocol."""
1087         self.to_screen('RTMP download detected')
1088
1089     def _signature_cache_id(self, example_sig):
1090         """ Return a string representation of a signature """
1091         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1092
1093     def _extract_signature_function(self, video_id, player_url, example_sig):
1094         id_m = re.match(
1095             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1096             player_url)
1097         if not id_m:
1098             raise ExtractorError('Cannot identify player %r' % player_url)
1099         player_type = id_m.group('ext')
1100         player_id = id_m.group('id')
1101
1102         # Read from filesystem cache
1103         func_id = '%s_%s_%s' % (
1104             player_type, player_id, self._signature_cache_id(example_sig))
1105         assert os.path.basename(func_id) == func_id
1106
1107         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1108         if cache_spec is not None:
1109             return lambda s: ''.join(s[i] for i in cache_spec)
1110
1111         download_note = (
1112             'Downloading player %s' % player_url
1113             if self._downloader.params.get('verbose') else
1114             'Downloading %s player %s' % (player_type, player_id)
1115         )
1116         if player_type == 'js':
1117             code = self._download_webpage(
1118                 player_url, video_id,
1119                 note=download_note,
1120                 errnote='Download of %s failed' % player_url)
1121             res = self._parse_sig_js(code)
1122         elif player_type == 'swf':
1123             urlh = self._request_webpage(
1124                 player_url, video_id,
1125                 note=download_note,
1126                 errnote='Download of %s failed' % player_url)
1127             code = urlh.read()
1128             res = self._parse_sig_swf(code)
1129         else:
1130             assert False, 'Invalid player type %r' % player_type
1131
1132         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1133         cache_res = res(test_string)
1134         cache_spec = [ord(c) for c in cache_res]
1135
1136         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1137         return res
1138
1139     def _print_sig_code(self, func, example_sig):
1140         def gen_sig_code(idxs):
1141             def _genslice(start, end, step):
1142                 starts = '' if start == 0 else str(start)
1143                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1144                 steps = '' if step == 1 else (':%d' % step)
1145                 return 's[%s%s%s]' % (starts, ends, steps)
1146
1147             step = None
1148             # Quelch pyflakes warnings - start will be set when step is set
1149             start = '(Never used)'
1150             for i, prev in zip(idxs[1:], idxs[:-1]):
1151                 if step is not None:
1152                     if i - prev == step:
1153                         continue
1154                     yield _genslice(start, prev, step)
1155                     step = None
1156                     continue
1157                 if i - prev in [-1, 1]:
1158                     step = i - prev
1159                     start = prev
1160                     continue
1161                 else:
1162                     yield 's[%d]' % prev
1163             if step is None:
1164                 yield 's[%d]' % i
1165             else:
1166                 yield _genslice(start, i, step)
1167
1168         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1169         cache_res = func(test_string)
1170         cache_spec = [ord(c) for c in cache_res]
1171         expr_code = ' + '.join(gen_sig_code(cache_spec))
1172         signature_id_tuple = '(%s)' % (
1173             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1174         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1175                 '    return %s\n') % (signature_id_tuple, expr_code)
1176         self.to_screen('Extracted signature function:\n' + code)
1177
1178     def _parse_sig_js(self, jscode):
1179         funcname = self._search_regex(
1180             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1181              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1182             jscode, 'Initial JS player signature function name', group='sig')
1183
1184         jsi = JSInterpreter(jscode)
1185         initial_function = jsi.extract_function(funcname)
1186         return lambda s: initial_function([s])
1187
1188     def _parse_sig_swf(self, file_contents):
1189         swfi = SWFInterpreter(file_contents)
1190         TARGET_CLASSNAME = 'SignatureDecipher'
1191         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1192         initial_function = swfi.extract_function(searched_class, 'decipher')
1193         return lambda s: initial_function([s])
1194
1195     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1196         """Turn the encrypted s field into a working signature"""
1197
1198         if player_url is None:
1199             raise ExtractorError('Cannot decrypt signature without player_url')
1200
1201         if player_url.startswith('//'):
1202             player_url = 'https:' + player_url
1203         elif not re.match(r'https?://', player_url):
1204             player_url = compat_urlparse.urljoin(
1205                 'https://www.youtube.com', player_url)
1206         try:
1207             player_id = (player_url, self._signature_cache_id(s))
1208             if player_id not in self._player_cache:
1209                 func = self._extract_signature_function(
1210                     video_id, player_url, s
1211                 )
1212                 self._player_cache[player_id] = func
1213             func = self._player_cache[player_id]
1214             if self._downloader.params.get('youtube_print_sig_code'):
1215                 self._print_sig_code(func, s)
1216             return func(s)
1217         except Exception as e:
1218             tb = traceback.format_exc()
1219             raise ExtractorError(
1220                 'Signature extraction failed: ' + tb, cause=e)
1221
1222     def _get_subtitles(self, video_id, webpage):
1223         try:
1224             subs_doc = self._download_xml(
1225                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1226                 video_id, note=False)
1227         except ExtractorError as err:
1228             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1229             return {}
1230
1231         sub_lang_list = {}
1232         for track in subs_doc.findall('track'):
1233             lang = track.attrib['lang_code']
1234             if lang in sub_lang_list:
1235                 continue
1236             sub_formats = []
1237             for ext in self._SUBTITLE_FORMATS:
1238                 params = compat_urllib_parse_urlencode({
1239                     'lang': lang,
1240                     'v': video_id,
1241                     'fmt': ext,
1242                     'name': track.attrib['name'].encode('utf-8'),
1243                 })
1244                 sub_formats.append({
1245                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1246                     'ext': ext,
1247                 })
1248             sub_lang_list[lang] = sub_formats
1249         if not sub_lang_list:
1250             self._downloader.report_warning('video doesn\'t have subtitles')
1251             return {}
1252         return sub_lang_list
1253
1254     def _get_ytplayer_config(self, video_id, webpage):
1255         patterns = (
1256             # User data may contain arbitrary character sequences that may affect
1257             # JSON extraction with regex, e.g. when '};' is contained the second
1258             # regex won't capture the whole JSON. Yet working around by trying more
1259             # concrete regex first keeping in mind proper quoted string handling
1260             # to be implemented in future that will replace this workaround (see
1261             # https://github.com/rg3/youtube-dl/issues/7468,
1262             # https://github.com/rg3/youtube-dl/pull/7599)
1263             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1264             r';ytplayer\.config\s*=\s*({.+?});',
1265         )
1266         config = self._search_regex(
1267             patterns, webpage, 'ytplayer.config', default=None)
1268         if config:
1269             return self._parse_json(
1270                 uppercase_escape(config), video_id, fatal=False)
1271
1272     def _get_automatic_captions(self, video_id, webpage):
1273         """We need the webpage for getting the captions url, pass it as an
1274            argument to speed up the process."""
1275         self.to_screen('%s: Looking for automatic captions' % video_id)
1276         player_config = self._get_ytplayer_config(video_id, webpage)
1277         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1278         if not player_config:
1279             self._downloader.report_warning(err_msg)
1280             return {}
1281         try:
1282             args = player_config['args']
1283             caption_url = args.get('ttsurl')
1284             if caption_url:
1285                 timestamp = args['timestamp']
1286                 # We get the available subtitles
1287                 list_params = compat_urllib_parse_urlencode({
1288                     'type': 'list',
1289                     'tlangs': 1,
1290                     'asrs': 1,
1291                 })
1292                 list_url = caption_url + '&' + list_params
1293                 caption_list = self._download_xml(list_url, video_id)
1294                 original_lang_node = caption_list.find('track')
1295                 if original_lang_node is None:
1296                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1297                     return {}
1298                 original_lang = original_lang_node.attrib['lang_code']
1299                 caption_kind = original_lang_node.attrib.get('kind', '')
1300
1301                 sub_lang_list = {}
1302                 for lang_node in caption_list.findall('target'):
1303                     sub_lang = lang_node.attrib['lang_code']
1304                     sub_formats = []
1305                     for ext in self._SUBTITLE_FORMATS:
1306                         params = compat_urllib_parse_urlencode({
1307                             'lang': original_lang,
1308                             'tlang': sub_lang,
1309                             'fmt': ext,
1310                             'ts': timestamp,
1311                             'kind': caption_kind,
1312                         })
1313                         sub_formats.append({
1314                             'url': caption_url + '&' + params,
1315                             'ext': ext,
1316                         })
1317                     sub_lang_list[sub_lang] = sub_formats
1318                 return sub_lang_list
1319
1320             def make_captions(sub_url, sub_langs):
1321                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1322                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1323                 captions = {}
1324                 for sub_lang in sub_langs:
1325                     sub_formats = []
1326                     for ext in self._SUBTITLE_FORMATS:
1327                         caption_qs.update({
1328                             'tlang': [sub_lang],
1329                             'fmt': [ext],
1330                         })
1331                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1332                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1333                         sub_formats.append({
1334                             'url': sub_url,
1335                             'ext': ext,
1336                         })
1337                     captions[sub_lang] = sub_formats
1338                 return captions
1339
1340             # New captions format as of 22.06.2017
1341             player_response = args.get('player_response')
1342             if player_response and isinstance(player_response, compat_str):
1343                 player_response = self._parse_json(
1344                     player_response, video_id, fatal=False)
1345                 if player_response:
1346                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1347                     base_url = renderer['captionTracks'][0]['baseUrl']
1348                     sub_lang_list = []
1349                     for lang in renderer['translationLanguages']:
1350                         lang_code = lang.get('languageCode')
1351                         if lang_code:
1352                             sub_lang_list.append(lang_code)
1353                     return make_captions(base_url, sub_lang_list)
1354
1355             # Some videos don't provide ttsurl but rather caption_tracks and
1356             # caption_translation_languages (e.g. 20LmZk1hakA)
1357             # Does not used anymore as of 22.06.2017
1358             caption_tracks = args['caption_tracks']
1359             caption_translation_languages = args['caption_translation_languages']
1360             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1361             sub_lang_list = []
1362             for lang in caption_translation_languages.split(','):
1363                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1364                 sub_lang = lang_qs.get('lc', [None])[0]
1365                 if sub_lang:
1366                     sub_lang_list.append(sub_lang)
1367             return make_captions(caption_url, sub_lang_list)
1368         # An extractor error can be raise by the download process if there are
1369         # no automatic captions but there are subtitles
1370         except (KeyError, IndexError, ExtractorError):
1371             self._downloader.report_warning(err_msg)
1372             return {}
1373
1374     def _mark_watched(self, video_id, video_info):
1375         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1376         if not playback_url:
1377             return
1378         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1379         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1380
1381         # cpn generation algorithm is reverse engineered from base.js.
1382         # In fact it works even with dummy cpn.
1383         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1384         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1385
1386         qs.update({
1387             'ver': ['2'],
1388             'cpn': [cpn],
1389         })
1390         playback_url = compat_urlparse.urlunparse(
1391             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1392
1393         self._download_webpage(
1394             playback_url, video_id, 'Marking watched',
1395             'Unable to mark watched', fatal=False)
1396
1397     @staticmethod
1398     def _extract_urls(webpage):
1399         # Embedded YouTube player
1400         entries = [
1401             unescapeHTML(mobj.group('url'))
1402             for mobj in re.finditer(r'''(?x)
1403             (?:
1404                 <iframe[^>]+?src=|
1405                 data-video-url=|
1406                 <embed[^>]+?src=|
1407                 embedSWF\(?:\s*|
1408                 <object[^>]+data=|
1409                 new\s+SWFObject\(
1410             )
1411             (["\'])
1412                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1413                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1414             \1''', webpage)]
1415
1416         # lazyYT YouTube embed
1417         entries.extend(list(map(
1418             unescapeHTML,
1419             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1420
1421         # Wordpress "YouTube Video Importer" plugin
1422         matches = re.findall(r'''(?x)<div[^>]+
1423             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1424             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1425         entries.extend(m[-1] for m in matches)
1426
1427         return entries
1428
1429     @staticmethod
1430     def _extract_url(webpage):
1431         urls = YoutubeIE._extract_urls(webpage)
1432         return urls[0] if urls else None
1433
1434     @classmethod
1435     def extract_id(cls, url):
1436         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1437         if mobj is None:
1438             raise ExtractorError('Invalid URL: %s' % url)
1439         video_id = mobj.group(2)
1440         return video_id
1441
1442     def _extract_annotations(self, video_id):
1443         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1444         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1445
1446     @staticmethod
1447     def _extract_chapters(description, duration):
1448         if not description:
1449             return None
1450         chapter_lines = re.findall(
1451             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1452             description)
1453         if not chapter_lines:
1454             return None
1455         chapters = []
1456         for next_num, (chapter_line, time_point) in enumerate(
1457                 chapter_lines, start=1):
1458             start_time = parse_duration(time_point)
1459             if start_time is None:
1460                 continue
1461             if start_time > duration:
1462                 break
1463             end_time = (duration if next_num == len(chapter_lines)
1464                         else parse_duration(chapter_lines[next_num][1]))
1465             if end_time is None:
1466                 continue
1467             if end_time > duration:
1468                 end_time = duration
1469             if start_time > end_time:
1470                 break
1471             chapter_title = re.sub(
1472                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1473             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1474             chapters.append({
1475                 'start_time': start_time,
1476                 'end_time': end_time,
1477                 'title': chapter_title,
1478             })
1479         return chapters
1480
1481     def _real_extract(self, url):
1482         url, smuggled_data = unsmuggle_url(url, {})
1483
1484         proto = (
1485             'http' if self._downloader.params.get('prefer_insecure', False)
1486             else 'https')
1487
1488         start_time = None
1489         end_time = None
1490         parsed_url = compat_urllib_parse_urlparse(url)
1491         for component in [parsed_url.fragment, parsed_url.query]:
1492             query = compat_parse_qs(component)
1493             if start_time is None and 't' in query:
1494                 start_time = parse_duration(query['t'][0])
1495             if start_time is None and 'start' in query:
1496                 start_time = parse_duration(query['start'][0])
1497             if end_time is None and 'end' in query:
1498                 end_time = parse_duration(query['end'][0])
1499
1500         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1501         mobj = re.search(self._NEXT_URL_RE, url)
1502         if mobj:
1503             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1504         video_id = self.extract_id(url)
1505
1506         # Get video webpage
1507         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1508         video_webpage = self._download_webpage(url, video_id)
1509
1510         # Attempt to extract SWF player URL
1511         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1512         if mobj is not None:
1513             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1514         else:
1515             player_url = None
1516
1517         dash_mpds = []
1518
1519         def add_dash_mpd(video_info):
1520             dash_mpd = video_info.get('dashmpd')
1521             if dash_mpd and dash_mpd[0] not in dash_mpds:
1522                 dash_mpds.append(dash_mpd[0])
1523
1524         is_live = None
1525         view_count = None
1526
1527         def extract_view_count(v_info):
1528             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1529
1530         # Get video info
1531         embed_webpage = None
1532         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1533             age_gate = True
1534             # We simulate the access to the video from www.youtube.com/v/{video_id}
1535             # this can be viewed without login into Youtube
1536             url = proto + '://www.youtube.com/embed/%s' % video_id
1537             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1538             data = compat_urllib_parse_urlencode({
1539                 'video_id': video_id,
1540                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1541                 'sts': self._search_regex(
1542                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1543             })
1544             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1545             video_info_webpage = self._download_webpage(
1546                 video_info_url, video_id,
1547                 note='Refetching age-gated info webpage',
1548                 errnote='unable to download video info webpage')
1549             video_info = compat_parse_qs(video_info_webpage)
1550             add_dash_mpd(video_info)
1551         else:
1552             age_gate = False
1553             video_info = None
1554             sts = None
1555             # Try looking directly into the video webpage
1556             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1557             if ytplayer_config:
1558                 args = ytplayer_config['args']
1559                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1560                     # Convert to the same format returned by compat_parse_qs
1561                     video_info = dict((k, [v]) for k, v in args.items())
1562                     add_dash_mpd(video_info)
1563                 # Rental video is not rented but preview is available (e.g.
1564                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1565                 # https://github.com/rg3/youtube-dl/issues/10532)
1566                 if not video_info and args.get('ypc_vid'):
1567                     return self.url_result(
1568                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1569                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1570                     is_live = True
1571                 sts = ytplayer_config.get('sts')
1572             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1573                 # We also try looking in get_video_info since it may contain different dashmpd
1574                 # URL that points to a DASH manifest with possibly different itag set (some itags
1575                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1576                 # manifest pointed by get_video_info's dashmpd).
1577                 # The general idea is to take a union of itags of both DASH manifests (for example
1578                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1579                 self.report_video_info_webpage_download(video_id)
1580                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1581                     query = {
1582                         'video_id': video_id,
1583                         'ps': 'default',
1584                         'eurl': '',
1585                         'gl': 'US',
1586                         'hl': 'en',
1587                     }
1588                     if el:
1589                         query['el'] = el
1590                     if sts:
1591                         query['sts'] = sts
1592                     video_info_webpage = self._download_webpage(
1593                         '%s://www.youtube.com/get_video_info' % proto,
1594                         video_id, note=False,
1595                         errnote='unable to download video info webpage',
1596                         fatal=False, query=query)
1597                     if not video_info_webpage:
1598                         continue
1599                     get_video_info = compat_parse_qs(video_info_webpage)
1600                     add_dash_mpd(get_video_info)
1601                     if view_count is None:
1602                         view_count = extract_view_count(get_video_info)
1603                     if not video_info:
1604                         video_info = get_video_info
1605                     if 'token' in get_video_info:
1606                         # Different get_video_info requests may report different results, e.g.
1607                         # some may report video unavailability, but some may serve it without
1608                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1609                         # the original webpage as well as el=info and el=embedded get_video_info
1610                         # requests report video unavailability due to geo restriction while
1611                         # el=detailpage succeeds and returns valid data). This is probably
1612                         # due to YouTube measures against IP ranges of hosting providers.
1613                         # Working around by preferring the first succeeded video_info containing
1614                         # the token if no such video_info yet was found.
1615                         if 'token' not in video_info:
1616                             video_info = get_video_info
1617                         break
1618
1619         def extract_unavailable_message():
1620             return self._html_search_regex(
1621                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1622                 video_webpage, 'unavailable message', default=None)
1623
1624         if 'token' not in video_info:
1625             if 'reason' in video_info:
1626                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1627                     regions_allowed = self._html_search_meta(
1628                         'regionsAllowed', video_webpage, default=None)
1629                     countries = regions_allowed.split(',') if regions_allowed else None
1630                     self.raise_geo_restricted(
1631                         msg=video_info['reason'][0], countries=countries)
1632                 reason = video_info['reason'][0]
1633                 if 'Invalid parameters' in reason:
1634                     unavailable_message = extract_unavailable_message()
1635                     if unavailable_message:
1636                         reason = unavailable_message
1637                 raise ExtractorError(
1638                     'YouTube said: %s' % reason,
1639                     expected=True, video_id=video_id)
1640             else:
1641                 raise ExtractorError(
1642                     '"token" parameter not in video info for unknown reason',
1643                     video_id=video_id)
1644
1645         # title
1646         if 'title' in video_info:
1647             video_title = video_info['title'][0]
1648         else:
1649             self._downloader.report_warning('Unable to extract video title')
1650             video_title = '_'
1651
1652         # description
1653         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1654         if video_description:
1655
1656             def replace_url(m):
1657                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1658                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1659                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1660                     qs = compat_parse_qs(parsed_redir_url.query)
1661                     q = qs.get('q')
1662                     if q and q[0]:
1663                         return q[0]
1664                 return redir_url
1665
1666             description_original = video_description = re.sub(r'''(?x)
1667                 <a\s+
1668                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1669                     (?:title|href)="([^"]+)"\s+
1670                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1671                     class="[^"]*"[^>]*>
1672                 [^<]+\.{3}\s*
1673                 </a>
1674             ''', replace_url, video_description)
1675             video_description = clean_html(video_description)
1676         else:
1677             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1678             if fd_mobj:
1679                 video_description = unescapeHTML(fd_mobj.group(1))
1680             else:
1681                 video_description = ''
1682
1683         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1684             if not self._downloader.params.get('noplaylist'):
1685                 entries = []
1686                 feed_ids = []
1687                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1688                 for feed in multifeed_metadata_list.split(','):
1689                     # Unquote should take place before split on comma (,) since textual
1690                     # fields may contain comma as well (see
1691                     # https://github.com/rg3/youtube-dl/issues/8536)
1692                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1693                     entries.append({
1694                         '_type': 'url_transparent',
1695                         'ie_key': 'Youtube',
1696                         'url': smuggle_url(
1697                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1698                             {'force_singlefeed': True}),
1699                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1700                     })
1701                     feed_ids.append(feed_data['id'][0])
1702                 self.to_screen(
1703                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1704                     % (', '.join(feed_ids), video_id))
1705                 return self.playlist_result(entries, video_id, video_title, video_description)
1706             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1707
1708         if view_count is None:
1709             view_count = extract_view_count(video_info)
1710
1711         # Check for "rental" videos
1712         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1713             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1714
1715         def _extract_filesize(media_url):
1716             return int_or_none(self._search_regex(
1717                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1718
1719         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1720             self.report_rtmp_download()
1721             formats = [{
1722                 'format_id': '_rtmp',
1723                 'protocol': 'rtmp',
1724                 'url': video_info['conn'][0],
1725                 'player_url': player_url,
1726             }]
1727         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1728             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1729             if 'rtmpe%3Dyes' in encoded_url_map:
1730                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1731             formats_spec = {}
1732             fmt_list = video_info.get('fmt_list', [''])[0]
1733             if fmt_list:
1734                 for fmt in fmt_list.split(','):
1735                     spec = fmt.split('/')
1736                     if len(spec) > 1:
1737                         width_height = spec[1].split('x')
1738                         if len(width_height) == 2:
1739                             formats_spec[spec[0]] = {
1740                                 'resolution': spec[1],
1741                                 'width': int_or_none(width_height[0]),
1742                                 'height': int_or_none(width_height[1]),
1743                             }
1744             q = qualities(['small', 'medium', 'hd720'])
1745             formats = []
1746             for url_data_str in encoded_url_map.split(','):
1747                 url_data = compat_parse_qs(url_data_str)
1748                 if 'itag' not in url_data or 'url' not in url_data:
1749                     continue
1750                 format_id = url_data['itag'][0]
1751                 url = url_data['url'][0]
1752
1753                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1754                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1755                     jsplayer_url_json = self._search_regex(
1756                         ASSETS_RE,
1757                         embed_webpage if age_gate else video_webpage,
1758                         'JS player URL (1)', default=None)
1759                     if not jsplayer_url_json and not age_gate:
1760                         # We need the embed website after all
1761                         if embed_webpage is None:
1762                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1763                             embed_webpage = self._download_webpage(
1764                                 embed_url, video_id, 'Downloading embed webpage')
1765                         jsplayer_url_json = self._search_regex(
1766                             ASSETS_RE, embed_webpage, 'JS player URL')
1767
1768                     player_url = json.loads(jsplayer_url_json)
1769                     if player_url is None:
1770                         player_url_json = self._search_regex(
1771                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1772                             video_webpage, 'age gate player URL')
1773                         player_url = json.loads(player_url_json)
1774
1775                 if 'sig' in url_data:
1776                     url += '&signature=' + url_data['sig'][0]
1777                 elif 's' in url_data:
1778                     encrypted_sig = url_data['s'][0]
1779
1780                     if self._downloader.params.get('verbose'):
1781                         if player_url is None:
1782                             player_version = 'unknown'
1783                             player_desc = 'unknown'
1784                         else:
1785                             if player_url.endswith('swf'):
1786                                 player_version = self._search_regex(
1787                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1788                                     'flash player', fatal=False)
1789                                 player_desc = 'flash player %s' % player_version
1790                             else:
1791                                 player_version = self._search_regex(
1792                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1793                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1794                                     player_url,
1795                                     'html5 player', fatal=False)
1796                                 player_desc = 'html5 player %s' % player_version
1797
1798                         parts_sizes = self._signature_cache_id(encrypted_sig)
1799                         self.to_screen('{%s} signature length %s, %s' %
1800                                        (format_id, parts_sizes, player_desc))
1801
1802                     signature = self._decrypt_signature(
1803                         encrypted_sig, video_id, player_url, age_gate)
1804                     url += '&signature=' + signature
1805                 if 'ratebypass' not in url:
1806                     url += '&ratebypass=yes'
1807
1808                 dct = {
1809                     'format_id': format_id,
1810                     'url': url,
1811                     'player_url': player_url,
1812                 }
1813                 if format_id in self._formats:
1814                     dct.update(self._formats[format_id])
1815                 if format_id in formats_spec:
1816                     dct.update(formats_spec[format_id])
1817
1818                 # Some itags are not included in DASH manifest thus corresponding formats will
1819                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1820                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1821                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1822                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1823
1824                 filesize = int_or_none(url_data.get(
1825                     'clen', [None])[0]) or _extract_filesize(url)
1826
1827                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1828
1829                 more_fields = {
1830                     'filesize': filesize,
1831                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1832                     'width': width,
1833                     'height': height,
1834                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1835                     'format_note': quality,
1836                     'quality': q(quality),
1837                 }
1838                 for key, value in more_fields.items():
1839                     if value:
1840                         dct[key] = value
1841                 type_ = url_data.get('type', [None])[0]
1842                 if type_:
1843                     type_split = type_.split(';')
1844                     kind_ext = type_split[0].split('/')
1845                     if len(kind_ext) == 2:
1846                         kind, _ = kind_ext
1847                         dct['ext'] = mimetype2ext(type_split[0])
1848                         if kind in ('audio', 'video'):
1849                             codecs = None
1850                             for mobj in re.finditer(
1851                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1852                                 if mobj.group('key') == 'codecs':
1853                                     codecs = mobj.group('val')
1854                                     break
1855                             if codecs:
1856                                 dct.update(parse_codecs(codecs))
1857                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1858                     dct['downloader_options'] = {
1859                         # Youtube throttles chunks >~10M
1860                         'http_chunk_size': 10485760,
1861                     }
1862                 formats.append(dct)
1863         elif video_info.get('hlsvp'):
1864             manifest_url = video_info['hlsvp'][0]
1865             formats = []
1866             m3u8_formats = self._extract_m3u8_formats(
1867                 manifest_url, video_id, 'mp4', fatal=False)
1868             for a_format in m3u8_formats:
1869                 itag = self._search_regex(
1870                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1871                 if itag:
1872                     a_format['format_id'] = itag
1873                     if itag in self._formats:
1874                         dct = self._formats[itag].copy()
1875                         dct.update(a_format)
1876                         a_format = dct
1877                 a_format['player_url'] = player_url
1878                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1879                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1880                 formats.append(a_format)
1881         else:
1882             error_message = clean_html(video_info.get('reason', [None])[0])
1883             if not error_message:
1884                 error_message = extract_unavailable_message()
1885             if error_message:
1886                 raise ExtractorError(error_message, expected=True)
1887             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1888
1889         # uploader
1890         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1891         if video_uploader:
1892             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1893         else:
1894             self._downloader.report_warning('unable to extract uploader name')
1895
1896         # uploader_id
1897         video_uploader_id = None
1898         video_uploader_url = None
1899         mobj = re.search(
1900             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1901             video_webpage)
1902         if mobj is not None:
1903             video_uploader_id = mobj.group('uploader_id')
1904             video_uploader_url = mobj.group('uploader_url')
1905         else:
1906             self._downloader.report_warning('unable to extract uploader nickname')
1907
1908         # thumbnail image
1909         # We try first to get a high quality image:
1910         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1911                             video_webpage, re.DOTALL)
1912         if m_thumb is not None:
1913             video_thumbnail = m_thumb.group(1)
1914         elif 'thumbnail_url' not in video_info:
1915             self._downloader.report_warning('unable to extract video thumbnail')
1916             video_thumbnail = None
1917         else:   # don't panic if we can't find it
1918             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1919
1920         # upload date
1921         upload_date = self._html_search_meta(
1922             'datePublished', video_webpage, 'upload date', default=None)
1923         if not upload_date:
1924             upload_date = self._search_regex(
1925                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1926                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1927                 video_webpage, 'upload date', default=None)
1928         upload_date = unified_strdate(upload_date)
1929
1930         video_license = self._html_search_regex(
1931             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1932             video_webpage, 'license', default=None)
1933
1934         m_music = re.search(
1935             r'''(?x)
1936                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1937                 <ul[^>]*>\s*
1938                 <li>(?P<title>.+?)
1939                 by (?P<creator>.+?)
1940                 (?:
1941                     \(.+?\)|
1942                     <a[^>]*
1943                         (?:
1944                             \bhref=["\']/red[^>]*>|             # drop possible
1945                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1946                         )
1947                     .*?
1948                 )?</li
1949             ''',
1950             video_webpage)
1951         if m_music:
1952             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1953             video_creator = clean_html(m_music.group('creator'))
1954         else:
1955             video_alt_title = video_creator = None
1956
1957         def extract_meta(field):
1958             return self._html_search_regex(
1959                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1960                 video_webpage, field, default=None)
1961
1962         track = extract_meta('Song')
1963         artist = extract_meta('Artist')
1964
1965         m_episode = re.search(
1966             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1967             video_webpage)
1968         if m_episode:
1969             series = m_episode.group('series')
1970             season_number = int(m_episode.group('season'))
1971             episode_number = int(m_episode.group('episode'))
1972         else:
1973             series = season_number = episode_number = None
1974
1975         m_cat_container = self._search_regex(
1976             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1977             video_webpage, 'categories', default=None)
1978         if m_cat_container:
1979             category = self._html_search_regex(
1980                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1981                 default=None)
1982             video_categories = None if category is None else [category]
1983         else:
1984             video_categories = None
1985
1986         video_tags = [
1987             unescapeHTML(m.group('content'))
1988             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1989
1990         def _extract_count(count_name):
1991             return str_to_int(self._search_regex(
1992                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1993                 % re.escape(count_name),
1994                 video_webpage, count_name, default=None))
1995
1996         like_count = _extract_count('like')
1997         dislike_count = _extract_count('dislike')
1998
1999         # subtitles
2000         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2001         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2002
2003         video_duration = try_get(
2004             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2005         if not video_duration:
2006             video_duration = parse_duration(self._html_search_meta(
2007                 'duration', video_webpage, 'video duration'))
2008
2009         # annotations
2010         video_annotations = None
2011         if self._downloader.params.get('writeannotations', False):
2012             video_annotations = self._extract_annotations(video_id)
2013
2014         chapters = self._extract_chapters(description_original, video_duration)
2015
2016         # Look for the DASH manifest
2017         if self._downloader.params.get('youtube_include_dash_manifest', True):
2018             dash_mpd_fatal = True
2019             for mpd_url in dash_mpds:
2020                 dash_formats = {}
2021                 try:
2022                     def decrypt_sig(mobj):
2023                         s = mobj.group(1)
2024                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2025                         return '/signature/%s' % dec_s
2026
2027                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2028
2029                     for df in self._extract_mpd_formats(
2030                             mpd_url, video_id, fatal=dash_mpd_fatal,
2031                             formats_dict=self._formats):
2032                         if not df.get('filesize'):
2033                             df['filesize'] = _extract_filesize(df['url'])
2034                         # Do not overwrite DASH format found in some previous DASH manifest
2035                         if df['format_id'] not in dash_formats:
2036                             dash_formats[df['format_id']] = df
2037                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2038                         # allow them to fail without bug report message if we already have
2039                         # some DASH manifest succeeded. This is temporary workaround to reduce
2040                         # burst of bug reports until we figure out the reason and whether it
2041                         # can be fixed at all.
2042                         dash_mpd_fatal = False
2043                 except (ExtractorError, KeyError) as e:
2044                     self.report_warning(
2045                         'Skipping DASH manifest: %r' % e, video_id)
2046                 if dash_formats:
2047                     # Remove the formats we found through non-DASH, they
2048                     # contain less info and it can be wrong, because we use
2049                     # fixed values (for example the resolution). See
2050                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2051                     # example.
2052                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2053                     formats.extend(dash_formats.values())
2054
2055         # Check for malformed aspect ratio
2056         stretched_m = re.search(
2057             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2058             video_webpage)
2059         if stretched_m:
2060             w = float(stretched_m.group('w'))
2061             h = float(stretched_m.group('h'))
2062             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2063             # We will only process correct ratios.
2064             if w > 0 and h > 0:
2065                 ratio = w / h
2066                 for f in formats:
2067                     if f.get('vcodec') != 'none':
2068                         f['stretched_ratio'] = ratio
2069
2070         self._sort_formats(formats)
2071
2072         self.mark_watched(video_id, video_info)
2073
2074         return {
2075             'id': video_id,
2076             'uploader': video_uploader,
2077             'uploader_id': video_uploader_id,
2078             'uploader_url': video_uploader_url,
2079             'upload_date': upload_date,
2080             'license': video_license,
2081             'creator': video_creator or artist,
2082             'title': video_title,
2083             'alt_title': video_alt_title or track,
2084             'thumbnail': video_thumbnail,
2085             'description': video_description,
2086             'categories': video_categories,
2087             'tags': video_tags,
2088             'subtitles': video_subtitles,
2089             'automatic_captions': automatic_captions,
2090             'duration': video_duration,
2091             'age_limit': 18 if age_gate else 0,
2092             'annotations': video_annotations,
2093             'chapters': chapters,
2094             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2095             'view_count': view_count,
2096             'like_count': like_count,
2097             'dislike_count': dislike_count,
2098             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2099             'formats': formats,
2100             'is_live': is_live,
2101             'start_time': start_time,
2102             'end_time': end_time,
2103             'series': series,
2104             'season_number': season_number,
2105             'episode_number': episode_number,
2106             'track': track,
2107             'artist': artist,
2108         }
2109
2110
2111 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2112     IE_DESC = 'YouTube.com playlists'
2113     _VALID_URL = r"""(?x)(?:
2114                         (?:https?://)?
2115                         (?:\w+\.)?
2116                         (?:
2117                             youtube\.com/
2118                             (?:
2119                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2120                                \? (?:.*?[&;])*? (?:p|a|list)=
2121                             |  p/
2122                             )|
2123                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2124                         )
2125                         (
2126                             (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
2127                             # Top tracks, they can also include dots
2128                             |(?:MC)[\w\.]*
2129                         )
2130                         .*
2131                      |
2132                         (%(playlist_id)s)
2133                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2134     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2135     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2136     IE_NAME = 'youtube:playlist'
2137     _TESTS = [{
2138         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2139         'info_dict': {
2140             'title': 'ytdl test PL',
2141             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2142         },
2143         'playlist_count': 3,
2144     }, {
2145         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2146         'info_dict': {
2147             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2148             'title': 'YDL_Empty_List',
2149         },
2150         'playlist_count': 0,
2151         'skip': 'This playlist is private',
2152     }, {
2153         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2154         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2155         'info_dict': {
2156             'title': '29C3: Not my department',
2157             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2158         },
2159         'playlist_count': 95,
2160     }, {
2161         'note': 'issue #673',
2162         'url': 'PLBB231211A4F62143',
2163         'info_dict': {
2164             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2165             'id': 'PLBB231211A4F62143',
2166         },
2167         'playlist_mincount': 26,
2168     }, {
2169         'note': 'Large playlist',
2170         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2171         'info_dict': {
2172             'title': 'Uploads from Cauchemar',
2173             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2174         },
2175         'playlist_mincount': 799,
2176     }, {
2177         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2178         'info_dict': {
2179             'title': 'YDL_safe_search',
2180             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2181         },
2182         'playlist_count': 2,
2183         'skip': 'This playlist is private',
2184     }, {
2185         'note': 'embedded',
2186         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2187         'playlist_count': 4,
2188         'info_dict': {
2189             'title': 'JODA15',
2190             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2191         }
2192     }, {
2193         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2194         'playlist_mincount': 485,
2195         'info_dict': {
2196             'title': '2017 華語最新單曲 (2/24更新)',
2197             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2198         }
2199     }, {
2200         'note': 'Embedded SWF player',
2201         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2202         'playlist_count': 4,
2203         'info_dict': {
2204             'title': 'JODA7',
2205             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2206         }
2207     }, {
2208         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2209         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2210         'info_dict': {
2211             'title': 'Uploads from Interstellar Movie',
2212             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2213         },
2214         'playlist_mincount': 21,
2215     }, {
2216         # Playlist URL that does not actually serve a playlist
2217         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2218         'info_dict': {
2219             'id': 'FqZTN594JQw',
2220             'ext': 'webm',
2221             'title': "Smiley's People 01 detective, Adventure Series, Action",
2222             'uploader': 'STREEM',
2223             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2224             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2225             'upload_date': '20150526',
2226             'license': 'Standard YouTube License',
2227             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2228             'categories': ['People & Blogs'],
2229             'tags': list,
2230             'like_count': int,
2231             'dislike_count': int,
2232         },
2233         'params': {
2234             'skip_download': True,
2235         },
2236         'add_ie': [YoutubeIE.ie_key()],
2237     }, {
2238         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2239         'info_dict': {
2240             'id': 'yeWKywCrFtk',
2241             'ext': 'mp4',
2242             'title': 'Small Scale Baler and Braiding Rugs',
2243             'uploader': 'Backus-Page House Museum',
2244             'uploader_id': 'backuspagemuseum',
2245             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2246             'upload_date': '20161008',
2247             'license': 'Standard YouTube License',
2248             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2249             'categories': ['Nonprofits & Activism'],
2250             'tags': list,
2251             'like_count': int,
2252             'dislike_count': int,
2253         },
2254         'params': {
2255             'noplaylist': True,
2256             'skip_download': True,
2257         },
2258     }, {
2259         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2260         'only_matching': True,
2261     }, {
2262         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2263         'only_matching': True,
2264     }]
2265
2266     def _real_initialize(self):
2267         self._login()
2268
2269     def _extract_mix(self, playlist_id):
2270         # The mixes are generated from a single video
2271         # the id of the playlist is just 'RD' + video_id
2272         ids = []
2273         last_id = playlist_id[-11:]
2274         for n in itertools.count(1):
2275             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2276             webpage = self._download_webpage(
2277                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2278             new_ids = orderedSet(re.findall(
2279                 r'''(?xs)data-video-username=".*?".*?
2280                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2281                 webpage))
2282             # Fetch new pages until all the videos are repeated, it seems that
2283             # there are always 51 unique videos.
2284             new_ids = [_id for _id in new_ids if _id not in ids]
2285             if not new_ids:
2286                 break
2287             ids.extend(new_ids)
2288             last_id = ids[-1]
2289
2290         url_results = self._ids_to_results(ids)
2291
2292         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2293         title_span = (
2294             search_title('playlist-title') or
2295             search_title('title long-title') or
2296             search_title('title'))
2297         title = clean_html(title_span)
2298
2299         return self.playlist_result(url_results, playlist_id, title)
2300
2301     def _extract_playlist(self, playlist_id):
2302         url = self._TEMPLATE_URL % playlist_id
2303         page = self._download_webpage(url, playlist_id)
2304
2305         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2306         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2307             match = match.strip()
2308             # Check if the playlist exists or is private
2309             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2310             if mobj:
2311                 reason = mobj.group('reason')
2312                 message = 'This playlist %s' % reason
2313                 if 'private' in reason:
2314                     message += ', use --username or --netrc to access it'
2315                 message += '.'
2316                 raise ExtractorError(message, expected=True)
2317             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2318                 raise ExtractorError(
2319                     'Invalid parameters. Maybe URL is incorrect.',
2320                     expected=True)
2321             elif re.match(r'[^<]*Choose your language[^<]*', match):
2322                 continue
2323             else:
2324                 self.report_warning('Youtube gives an alert message: ' + match)
2325
2326         playlist_title = self._html_search_regex(
2327             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2328             page, 'title', default=None)
2329
2330         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2331         uploader = self._search_regex(
2332             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2333             page, 'uploader', default=None)
2334         mobj = re.search(
2335             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2336             page)
2337         if mobj:
2338             uploader_id = mobj.group('uploader_id')
2339             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2340         else:
2341             uploader_id = uploader_url = None
2342
2343         has_videos = True
2344
2345         if not playlist_title:
2346             try:
2347                 # Some playlist URLs don't actually serve a playlist (e.g.
2348                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2349                 next(self._entries(page, playlist_id))
2350             except StopIteration:
2351                 has_videos = False
2352
2353         playlist = self.playlist_result(
2354             self._entries(page, playlist_id), playlist_id, playlist_title)
2355         playlist.update({
2356             'uploader': uploader,
2357             'uploader_id': uploader_id,
2358             'uploader_url': uploader_url,
2359         })
2360
2361         return has_videos, playlist
2362
2363     def _check_download_just_video(self, url, playlist_id):
2364         # Check if it's a video-specific URL
2365         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2366         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2367             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2368             'video id', default=None)
2369         if video_id:
2370             if self._downloader.params.get('noplaylist'):
2371                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2372                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2373             else:
2374                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2375                 return video_id, None
2376         return None, None
2377
2378     def _real_extract(self, url):
2379         # Extract playlist id
2380         mobj = re.match(self._VALID_URL, url)
2381         if mobj is None:
2382             raise ExtractorError('Invalid URL: %s' % url)
2383         playlist_id = mobj.group(1) or mobj.group(2)
2384
2385         video_id, video = self._check_download_just_video(url, playlist_id)
2386         if video:
2387             return video
2388
2389         if playlist_id.startswith(('RD', 'UL', 'PU')):
2390             # Mixes require a custom extraction process
2391             return self._extract_mix(playlist_id)
2392
2393         has_videos, playlist = self._extract_playlist(playlist_id)
2394         if has_videos or not video_id:
2395             return playlist
2396
2397         # Some playlist URLs don't actually serve a playlist (see
2398         # https://github.com/rg3/youtube-dl/issues/10537).
2399         # Fallback to plain video extraction if there is a video id
2400         # along with playlist id.
2401         return self.url_result(video_id, 'Youtube', video_id=video_id)
2402
2403
2404 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2405     IE_DESC = 'YouTube.com channels'
2406     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2407     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2408     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2409     IE_NAME = 'youtube:channel'
2410     _TESTS = [{
2411         'note': 'paginated channel',
2412         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2413         'playlist_mincount': 91,
2414         'info_dict': {
2415             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2416             'title': 'Uploads from lex will',
2417         }
2418     }, {
2419         'note': 'Age restricted channel',
2420         # from https://www.youtube.com/user/DeusExOfficial
2421         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2422         'playlist_mincount': 64,
2423         'info_dict': {
2424             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2425             'title': 'Uploads from Deus Ex',
2426         },
2427     }]
2428
2429     @classmethod
2430     def suitable(cls, url):
2431         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2432                 else super(YoutubeChannelIE, cls).suitable(url))
2433
2434     def _build_template_url(self, url, channel_id):
2435         return self._TEMPLATE_URL % channel_id
2436
2437     def _real_extract(self, url):
2438         channel_id = self._match_id(url)
2439
2440         url = self._build_template_url(url, channel_id)
2441
2442         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2443         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2444         # otherwise fallback on channel by page extraction
2445         channel_page = self._download_webpage(
2446             url + '?view=57', channel_id,
2447             'Downloading channel page', fatal=False)
2448         if channel_page is False:
2449             channel_playlist_id = False
2450         else:
2451             channel_playlist_id = self._html_search_meta(
2452                 'channelId', channel_page, 'channel id', default=None)
2453             if not channel_playlist_id:
2454                 channel_url = self._html_search_meta(
2455                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2456                     channel_page, 'channel url', default=None)
2457                 if channel_url:
2458                     channel_playlist_id = self._search_regex(
2459                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2460                         channel_url, 'channel id', default=None)
2461         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2462             playlist_id = 'UU' + channel_playlist_id[2:]
2463             return self.url_result(
2464                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2465
2466         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2467         autogenerated = re.search(r'''(?x)
2468                 class="[^"]*?(?:
2469                     channel-header-autogenerated-label|
2470                     yt-channel-title-autogenerated
2471                 )[^"]*"''', channel_page) is not None
2472
2473         if autogenerated:
2474             # The videos are contained in a single page
2475             # the ajax pages can't be used, they are empty
2476             entries = [
2477                 self.url_result(
2478                     video_id, 'Youtube', video_id=video_id,
2479                     video_title=video_title)
2480                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2481             return self.playlist_result(entries, channel_id)
2482
2483         try:
2484             next(self._entries(channel_page, channel_id))
2485         except StopIteration:
2486             alert_message = self._html_search_regex(
2487                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2488                 channel_page, 'alert', default=None, group='alert')
2489             if alert_message:
2490                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2491
2492         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2493
2494
2495 class YoutubeUserIE(YoutubeChannelIE):
2496     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2497     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2498     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2499     IE_NAME = 'youtube:user'
2500
2501     _TESTS = [{
2502         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2503         'playlist_mincount': 320,
2504         'info_dict': {
2505             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2506             'title': 'Uploads from The Linux Foundation',
2507         }
2508     }, {
2509         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2510         # but not https://www.youtube.com/user/12minuteathlete/videos
2511         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2512         'playlist_mincount': 249,
2513         'info_dict': {
2514             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2515             'title': 'Uploads from 12 Minute Athlete',
2516         }
2517     }, {
2518         'url': 'ytuser:phihag',
2519         'only_matching': True,
2520     }, {
2521         'url': 'https://www.youtube.com/c/gametrailers',
2522         'only_matching': True,
2523     }, {
2524         'url': 'https://www.youtube.com/gametrailers',
2525         'only_matching': True,
2526     }, {
2527         # This channel is not available, geo restricted to JP
2528         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2529         'only_matching': True,
2530     }]
2531
2532     @classmethod
2533     def suitable(cls, url):
2534         # Don't return True if the url can be extracted with other youtube
2535         # extractor, the regex would is too permissive and it would match.
2536         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2537         if any(ie.suitable(url) for ie in other_yt_ies):
2538             return False
2539         else:
2540             return super(YoutubeUserIE, cls).suitable(url)
2541
2542     def _build_template_url(self, url, channel_id):
2543         mobj = re.match(self._VALID_URL, url)
2544         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2545
2546
2547 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2548     IE_DESC = 'YouTube.com live streams'
2549     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2550     IE_NAME = 'youtube:live'
2551
2552     _TESTS = [{
2553         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2554         'info_dict': {
2555             'id': 'a48o2S1cPoo',
2556             'ext': 'mp4',
2557             'title': 'The Young Turks - Live Main Show',
2558             'uploader': 'The Young Turks',
2559             'uploader_id': 'TheYoungTurks',
2560             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2561             'upload_date': '20150715',
2562             'license': 'Standard YouTube License',
2563             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2564             'categories': ['News & Politics'],
2565             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2566             'like_count': int,
2567             'dislike_count': int,
2568         },
2569         'params': {
2570             'skip_download': True,
2571         },
2572     }, {
2573         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2574         'only_matching': True,
2575     }, {
2576         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2577         'only_matching': True,
2578     }, {
2579         'url': 'https://www.youtube.com/TheYoungTurks/live',
2580         'only_matching': True,
2581     }]
2582
2583     def _real_extract(self, url):
2584         mobj = re.match(self._VALID_URL, url)
2585         channel_id = mobj.group('id')
2586         base_url = mobj.group('base_url')
2587         webpage = self._download_webpage(url, channel_id, fatal=False)
2588         if webpage:
2589             page_type = self._og_search_property(
2590                 'type', webpage, 'page type', default='')
2591             video_id = self._html_search_meta(
2592                 'videoId', webpage, 'video id', default=None)
2593             if page_type.startswith('video') and video_id and re.match(
2594                     r'^[0-9A-Za-z_-]{11}$', video_id):
2595                 return self.url_result(video_id, YoutubeIE.ie_key())
2596         return self.url_result(base_url)
2597
2598
2599 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2600     IE_DESC = 'YouTube.com user/channel playlists'
2601     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2602     IE_NAME = 'youtube:playlists'
2603
2604     _TESTS = [{
2605         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2606         'playlist_mincount': 4,
2607         'info_dict': {
2608             'id': 'ThirstForScience',
2609             'title': 'Thirst for Science',
2610         },
2611     }, {
2612         # with "Load more" button
2613         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2614         'playlist_mincount': 70,
2615         'info_dict': {
2616             'id': 'igorkle1',
2617             'title': 'Игорь Клейнер',
2618         },
2619     }, {
2620         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2621         'playlist_mincount': 17,
2622         'info_dict': {
2623             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2624             'title': 'Chem Player',
2625         },
2626     }]
2627
2628
2629 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2630     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2631
2632
2633 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2634     IE_DESC = 'YouTube.com searches'
2635     # there doesn't appear to be a real limit, for example if you search for
2636     # 'python' you get more than 8.000.000 results
2637     _MAX_RESULTS = float('inf')
2638     IE_NAME = 'youtube:search'
2639     _SEARCH_KEY = 'ytsearch'
2640     _EXTRA_QUERY_ARGS = {}
2641     _TESTS = []
2642
2643     def _get_n_results(self, query, n):
2644         """Get a specified number of results for a query"""
2645
2646         videos = []
2647         limit = n
2648
2649         url_query = {
2650             'search_query': query.encode('utf-8'),
2651         }
2652         url_query.update(self._EXTRA_QUERY_ARGS)
2653         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2654
2655         for pagenum in itertools.count(1):
2656             data = self._download_json(
2657                 result_url, video_id='query "%s"' % query,
2658                 note='Downloading page %s' % pagenum,
2659                 errnote='Unable to download API page',
2660                 query={'spf': 'navigate'})
2661             html_content = data[1]['body']['content']
2662
2663             if 'class="search-message' in html_content:
2664                 raise ExtractorError(
2665                     '[youtube] No video results', expected=True)
2666
2667             new_videos = list(self._process_page(html_content))
2668             videos += new_videos
2669             if not new_videos or len(videos) > limit:
2670                 break
2671             next_link = self._html_search_regex(
2672                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2673                 html_content, 'next link', default=None)
2674             if next_link is None:
2675                 break
2676             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2677
2678         if len(videos) > n:
2679             videos = videos[:n]
2680         return self.playlist_result(videos, query)
2681
2682
2683 class YoutubeSearchDateIE(YoutubeSearchIE):
2684     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2685     _SEARCH_KEY = 'ytsearchdate'
2686     IE_DESC = 'YouTube.com searches, newest videos first'
2687     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2688
2689
2690 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2691     IE_DESC = 'YouTube.com search URLs'
2692     IE_NAME = 'youtube:search_url'
2693     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2694     _TESTS = [{
2695         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2696         'playlist_mincount': 5,
2697         'info_dict': {
2698             'title': 'youtube-dl test video',
2699         }
2700     }, {
2701         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2702         'only_matching': True,
2703     }]
2704
2705     def _real_extract(self, url):
2706         mobj = re.match(self._VALID_URL, url)
2707         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2708         webpage = self._download_webpage(url, query)
2709         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2710
2711
2712 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2713     IE_DESC = 'YouTube.com (multi-season) shows'
2714     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2715     IE_NAME = 'youtube:show'
2716     _TESTS = [{
2717         'url': 'https://www.youtube.com/show/airdisasters',
2718         'playlist_mincount': 5,
2719         'info_dict': {
2720             'id': 'airdisasters',
2721             'title': 'Air Disasters',
2722         }
2723     }]
2724
2725     def _real_extract(self, url):
2726         playlist_id = self._match_id(url)
2727         return super(YoutubeShowIE, self)._real_extract(
2728             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2729
2730
2731 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2732     """
2733     Base class for feed extractors
2734     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2735     """
2736     _LOGIN_REQUIRED = True
2737
2738     @property
2739     def IE_NAME(self):
2740         return 'youtube:%s' % self._FEED_NAME
2741
2742     def _real_initialize(self):
2743         self._login()
2744
2745     def _entries(self, page):
2746         # The extraction process is the same as for playlists, but the regex
2747         # for the video ids doesn't contain an index
2748         ids = []
2749         more_widget_html = content_html = page
2750         for page_num in itertools.count(1):
2751             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2752
2753             # 'recommended' feed has infinite 'load more' and each new portion spins
2754             # the same videos in (sometimes) slightly different order, so we'll check
2755             # for unicity and break when portion has no new videos
2756             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2757             if not new_ids:
2758                 break
2759
2760             ids.extend(new_ids)
2761
2762             for entry in self._ids_to_results(new_ids):
2763                 yield entry
2764
2765             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2766             if not mobj:
2767                 break
2768
2769             more = self._download_json(
2770                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2771                 'Downloading page #%s' % page_num,
2772                 transform_source=uppercase_escape)
2773             content_html = more['content_html']
2774             more_widget_html = more['load_more_widget_html']
2775
2776     def _real_extract(self, url):
2777         page = self._download_webpage(
2778             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2779             self._PLAYLIST_TITLE)
2780         return self.playlist_result(
2781             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2782
2783
2784 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2785     IE_NAME = 'youtube:watchlater'
2786     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2787     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2788
2789     _TESTS = [{
2790         'url': 'https://www.youtube.com/playlist?list=WL',
2791         'only_matching': True,
2792     }, {
2793         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2794         'only_matching': True,
2795     }]
2796
2797     def _real_extract(self, url):
2798         _, video = self._check_download_just_video(url, 'WL')
2799         if video:
2800             return video
2801         _, playlist = self._extract_playlist('WL')
2802         return playlist
2803
2804
2805 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2806     IE_NAME = 'youtube:favorites'
2807     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2808     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2809     _LOGIN_REQUIRED = True
2810
2811     def _real_extract(self, url):
2812         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2813         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2814         return self.url_result(playlist_id, 'YoutubePlaylist')
2815
2816
2817 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2818     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2819     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2820     _FEED_NAME = 'recommended'
2821     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2822
2823
2824 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2825     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2826     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2827     _FEED_NAME = 'subscriptions'
2828     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2829
2830
2831 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2832     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2833     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2834     _FEED_NAME = 'history'
2835     _PLAYLIST_TITLE = 'Youtube History'
2836
2837
2838 class YoutubeTruncatedURLIE(InfoExtractor):
2839     IE_NAME = 'youtube:truncated_url'
2840     IE_DESC = False  # Do not list
2841     _VALID_URL = r'''(?x)
2842         (?:https?://)?
2843         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2844         (?:watch\?(?:
2845             feature=[a-z_]+|
2846             annotation_id=annotation_[^&]+|
2847             x-yt-cl=[0-9]+|
2848             hl=[^&]*|
2849             t=[0-9]+
2850         )?
2851         |
2852             attribution_link\?a=[^&]+
2853         )
2854         $
2855     '''
2856
2857     _TESTS = [{
2858         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2859         'only_matching': True,
2860     }, {
2861         'url': 'https://www.youtube.com/watch?',
2862         'only_matching': True,
2863     }, {
2864         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2865         'only_matching': True,
2866     }, {
2867         'url': 'https://www.youtube.com/watch?feature=foo',
2868         'only_matching': True,
2869     }, {
2870         'url': 'https://www.youtube.com/watch?hl=en-GB',
2871         'only_matching': True,
2872     }, {
2873         'url': 'https://www.youtube.com/watch?t=2372',
2874         'only_matching': True,
2875     }]
2876
2877     def _real_extract(self, url):
2878         raise ExtractorError(
2879             'Did you forget to quote the URL? Remember that & is a meta '
2880             'character in most shells, so you want to put the URL in quotes, '
2881             'like  youtube-dl '
2882             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2883             ' or simply  youtube-dl BaW_jenozKc  .',
2884             expected=True)
2885
2886
2887 class YoutubeTruncatedIDIE(InfoExtractor):
2888     IE_NAME = 'youtube:truncated_id'
2889     IE_DESC = False  # Do not list
2890     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2891
2892     _TESTS = [{
2893         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2894         'only_matching': True,
2895     }]
2896
2897     def _real_extract(self, url):
2898         video_id = self._match_id(url)
2899         raise ExtractorError(
2900             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2901             expected=True)