youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import time
  11 import traceback
  12
  13 from .common import InfoExtractor, SearchInfoExtractor
  14 from ..jsinterp import JSInterpreter
  15 from ..swfinterp import SWFInterpreter
  16 from ..compat import (
  17     compat_chr,
  18     compat_parse_qs,
  19     compat_urllib_parse,
  20     compat_urllib_parse_unquote,
  21     compat_urllib_parse_unquote_plus,
  22     compat_urllib_parse_urlparse,
  23     compat_urllib_request,
  24     compat_urlparse,
  25     compat_str,
  26 )
  27 from ..utils import (
  28     clean_html,
  29     encode_dict,
  30     ExtractorError,
  31     float_or_none,
  32     get_element_by_attribute,
  33     get_element_by_id,
  34     int_or_none,
  35     orderedSet,
  36     parse_duration,
  37     remove_start,
  38     smuggle_url,
  39     str_to_int,
  40     unescapeHTML,
  41     unified_strdate,
  42     unsmuggle_url,
  43     uppercase_escape,
  44     ISO3166Utils,
  45 )
  46
  47
  48 class YoutubeBaseInfoExtractor(InfoExtractor):
  49     """Provide base functions for Youtube extractors"""
  50     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  51     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  52     _NETRC_MACHINE = 'youtube'
  53     # If True it will raise an error if no login info is provided
  54     _LOGIN_REQUIRED = False
  55
  56     def _set_language(self):
  57         self._set_cookie(
  58             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  59             # YouTube sets the expire time to about two months
  60             expire_time=time.time() + 2 * 30 * 24 * 3600)
  61
  62     def _ids_to_results(self, ids):
  63         return [
  64             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  65             for vid_id in ids]
  66
  67     def _login(self):
  68         """
  69         Attempt to log in to YouTube.
  70         True is returned if successful or skipped.
  71         False is returned if login failed.
  72
  73         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  74         """
  75         (username, password) = self._get_login_info()
  76         # No authentication to be performed
  77         if username is None:
  78             if self._LOGIN_REQUIRED:
  79                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  80             return True
  81
  82         login_page = self._download_webpage(
  83             self._LOGIN_URL, None,
  84             note='Downloading login page',
  85             errnote='unable to fetch login page', fatal=False)
  86         if login_page is False:
  87             return
  88
  89         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  90                                   login_page, 'Login GALX parameter')
  91
  92         # Log in
  93         login_form_strs = {
  94             'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  95             'Email': username,
  96             'GALX': galx,
  97             'Passwd': password,
  98
  99             'PersistentCookie': 'yes',
 100             '_utf8': '霱',
 101             'bgresponse': 'js_disabled',
 102             'checkConnection': '',
 103             'checkedDomains': 'youtube',
 104             'dnConn': '',
 105             'pstMsg': '0',
 106             'rmShown': '1',
 107             'secTok': '',
 108             'signIn': 'Sign in',
 109             'timeStmp': '',
 110             'service': 'youtube',
 111             'uilel': '3',
 112             'hl': 'en_US',
 113         }
 114
 115         login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
 116
 117         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 118         login_results = self._download_webpage(
 119             req, None,
 120             note='Logging in', errnote='unable to log in', fatal=False)
 121         if login_results is False:
 122             return False
 123
 124         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 125             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 126
 127         # Two-Factor
 128         # TODO add SMS and phone call support - these require making a request and then prompting the user
 129
 130         if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
 131             tfa_code = self._get_tfa_info('2-step verification code')
 132
 133             if not tfa_code:
 134                 self._downloader.report_warning(
 135                     'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 136                     '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 137                 return False
 138
 139             tfa_code = remove_start(tfa_code, 'G-')
 140
 141             tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
 142
 143             tfa_form_strs.update({
 144                 'Pin': tfa_code,
 145                 'TrustDevice': 'on',
 146             })
 147
 148             tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
 149
 150             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
 151             tfa_results = self._download_webpage(
 152                 tfa_req, None,
 153                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 154
 155             if tfa_results is False:
 156                 return False
 157
 158             if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
 159                 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
 160                 return False
 161             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
 162                 self._downloader.report_warning('unable to log in - did the page structure change?')
 163                 return False
 164             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 165                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 166                 return False
 167
 168         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 169             self._downloader.report_warning('unable to log in: bad username or password')
 170             return False
 171         return True
 172
 173     def _real_initialize(self):
 174         if self._downloader is None:
 175             return
 176         self._set_language()
 177         if not self._login():
 178             return
 179
 180
 181 class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
 182     # Extract entries from page with "Load more" button
 183     def _entries(self, page, playlist_id):
 184         more_widget_html = content_html = page
 185         for page_num in itertools.count(1):
 186             for entry in self._process_page(content_html):
 187                 yield entry
 188
 189             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 190             if not mobj:
 191                 break
 192
 193             more = self._download_json(
 194                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 195                 'Downloading page #%s' % page_num,
 196                 transform_source=uppercase_escape)
 197             content_html = more['content_html']
 198             if not content_html.strip():
 199                 # Some webpages show a "Load more" button but they don't
 200                 # have more videos
 201                 break
 202             more_widget_html = more['load_more_widget_html']
 203
 204
 205 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 206     def _process_page(self, content):
 207         for video_id, video_title in self.extract_videos_from_page(content):
 208             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 209
 210     def extract_videos_from_page(self, page):
 211         ids_in_page = []
 212         titles_in_page = []
 213         for mobj in re.finditer(self._VIDEO_RE, page):
 214             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 215             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 216                 continue
 217             video_id = mobj.group('id')
 218             video_title = unescapeHTML(mobj.group('title'))
 219             if video_title:
 220                 video_title = video_title.strip()
 221             try:
 222                 idx = ids_in_page.index(video_id)
 223                 if video_title and not titles_in_page[idx]:
 224                     titles_in_page[idx] = video_title
 225             except ValueError:
 226                 ids_in_page.append(video_id)
 227                 titles_in_page.append(video_title)
 228         return zip(ids_in_page, titles_in_page)
 229
 230
 231 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 232     def _process_page(self, content):
 233         for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
 234             yield self.url_result(
 235                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 236
 237     def _real_extract(self, url):
 238         playlist_id = self._match_id(url)
 239         webpage = self._download_webpage(url, playlist_id)
 240         title = self._og_search_title(webpage, fatal=False)
 241         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 242
 243
 244 class YoutubeIE(YoutubeBaseInfoExtractor):
 245     IE_DESC = 'YouTube.com'
 246     _VALID_URL = r"""(?x)^
 247                      (
 248                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 249                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 250                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 251                             (?:www\.)?pwnyoutube\.com/|
 252                             (?:www\.)?yourepeat\.com/|
 253                             tube\.majestyc\.net/|
 254                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 255                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 256                          (?:                                                  # the various things that can precede the ID:
 257                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 258                              |(?:                                             # or the v= param in all its forms
 259                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 260                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 261                                  (?:.*?&)??                                   # any other preceding param (like /?s=tuff&v=xxxx)
 262                                  v=
 263                              )
 264                          ))
 265                          |(?:
 266                             youtu\.be|                                        # just youtu.be/xxxx
 267                             vid\.plus                                         # or vid.plus/xxxx
 268                          )/
 269                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 270                          )
 271                      )?                                                       # all until now is optional -> you can pass the naked ID
 272                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 273                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 274                      (?(1).+)?                                                # if we found the ID, everything can follow
 275                      $"""
 276     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 277     _formats = {
 278         '5': {'ext': 'flv', 'width': 400, 'height': 240},
 279         '6': {'ext': 'flv', 'width': 450, 'height': 270},
 280         '13': {'ext': '3gp'},
 281         '17': {'ext': '3gp', 'width': 176, 'height': 144},
 282         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
 283         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
 284         '34': {'ext': 'flv', 'width': 640, 'height': 360},
 285         '35': {'ext': 'flv', 'width': 854, 'height': 480},
 286         '36': {'ext': '3gp', 'width': 320, 'height': 240},
 287         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
 288         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
 289         '43': {'ext': 'webm', 'width': 640, 'height': 360},
 290         '44': {'ext': 'webm', 'width': 854, 'height': 480},
 291         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
 292         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
 293         '59': {'ext': 'mp4', 'width': 854, 'height': 480},
 294         '78': {'ext': 'mp4', 'width': 854, 'height': 480},
 295
 296
 297         # 3d videos
 298         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
 299         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
 300         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
 301         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
 302         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
 303         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
 304         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 305
 306         # Apple HTTP Live Streaming
 307         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 308         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
 309         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
 310         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
 311         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
 312         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 313         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 314
 315         # DASH mp4 video
 316         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 317         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 318         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 319         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 320         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 321         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 322         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 323         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 324         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 325         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 326         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 327
 328         # Dash mp4 audio
 329         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
 330         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
 331         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 332
 333         # Dash webm
 334         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 335         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 336         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 337         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 338         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 339         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 340         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
 341         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 342         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 343         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 344         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 345         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 346         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 347         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 348         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 349         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 350         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 351         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 352         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 353         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
 354         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 355
 356         # Dash webm audio
 357         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 358         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 359
 360         # Dash webm audio with opus inside
 361         '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
 362         '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
 363         '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
 364
 365         # RTMP (unnamed)
 366         '_rtmp': {'protocol': 'rtmp'},
 367     }
 368
 369     IE_NAME = 'youtube'
 370     _TESTS = [
 371         {
 372             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
 373             'info_dict': {
 374                 'id': 'BaW_jenozKc',
 375                 'ext': 'mp4',
 376                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 377                 'uploader': 'Philipp Hagemeister',
 378                 'uploader_id': 'phihag',
 379                 'upload_date': '20121002',
 380                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 381                 'categories': ['Science & Technology'],
 382                 'tags': ['youtube-dl'],
 383                 'like_count': int,
 384                 'dislike_count': int,
 385                 'start_time': 1,
 386                 'end_time': 9,
 387             }
 388         },
 389         {
 390             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 391             'note': 'Test generic use_cipher_signature video (#897)',
 392             'info_dict': {
 393                 'id': 'UxxajLWwzqY',
 394                 'ext': 'mp4',
 395                 'upload_date': '20120506',
 396                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 397                 'description': 'md5:782e8651347686cba06e58f71ab51773',
 398                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 399                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 400                          'iconic ep', 'iconic', 'love', 'it'],
 401                 'uploader': 'Icona Pop',
 402                 'uploader_id': 'IconaPop',
 403             }
 404         },
 405         {
 406             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 407             'note': 'Test VEVO video with age protection (#956)',
 408             'info_dict': {
 409                 'id': '07FYdnEawAQ',
 410                 'ext': 'mp4',
 411                 'upload_date': '20130703',
 412                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 413                 'description': 'md5:64249768eec3bc4276236606ea996373',
 414                 'uploader': 'justintimberlakeVEVO',
 415                 'uploader_id': 'justintimberlakeVEVO',
 416                 'age_limit': 18,
 417             }
 418         },
 419         {
 420             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 421             'note': 'Embed-only video (#1746)',
 422             'info_dict': {
 423                 'id': 'yZIXLfi8CZQ',
 424                 'ext': 'mp4',
 425                 'upload_date': '20120608',
 426                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 427                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 428                 'uploader': 'SET India',
 429                 'uploader_id': 'setindia'
 430             }
 431         },
 432         {
 433             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
 434             'note': 'Use the first video ID in the URL',
 435             'info_dict': {
 436                 'id': 'BaW_jenozKc',
 437                 'ext': 'mp4',
 438                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 439                 'uploader': 'Philipp Hagemeister',
 440                 'uploader_id': 'phihag',
 441                 'upload_date': '20121002',
 442                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 443                 'categories': ['Science & Technology'],
 444                 'tags': ['youtube-dl'],
 445                 'like_count': int,
 446                 'dislike_count': int,
 447             },
 448             'params': {
 449                 'skip_download': True,
 450             },
 451         },
 452         {
 453             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 454             'note': '256k DASH audio (format 141) via DASH manifest',
 455             'info_dict': {
 456                 'id': 'a9LDPn-MO4I',
 457                 'ext': 'm4a',
 458                 'upload_date': '20121002',
 459                 'uploader_id': '8KVIDEO',
 460                 'description': '',
 461                 'uploader': '8KVIDEO',
 462                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 463             },
 464             'params': {
 465                 'youtube_include_dash_manifest': True,
 466                 'format': '141',
 467             },
 468         },
 469         # DASH manifest with encrypted signature
 470         {
 471             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 472             'info_dict': {
 473                 'id': 'IB3lcPjvWLA',
 474                 'ext': 'm4a',
 475                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 476                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 477                 'uploader': 'AfrojackVEVO',
 478                 'uploader_id': 'AfrojackVEVO',
 479                 'upload_date': '20131011',
 480             },
 481             'params': {
 482                 'youtube_include_dash_manifest': True,
 483                 'format': '141',
 484             },
 485         },
 486         # JS player signature function name containing $
 487         {
 488             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 489             'info_dict': {
 490                 'id': 'nfWlot6h_JM',
 491                 'ext': 'm4a',
 492                 'title': 'Taylor Swift - Shake It Off',
 493                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 494                 'uploader': 'TaylorSwiftVEVO',
 495                 'uploader_id': 'TaylorSwiftVEVO',
 496                 'upload_date': '20140818',
 497             },
 498             'params': {
 499                 'youtube_include_dash_manifest': True,
 500                 'format': '141',
 501             },
 502         },
 503         # Controversy video
 504         {
 505             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 506             'info_dict': {
 507                 'id': 'T4XJQO3qol8',
 508                 'ext': 'mp4',
 509                 'upload_date': '20100909',
 510                 'uploader': 'The Amazing Atheist',
 511                 'uploader_id': 'TheAmazingAtheist',
 512                 'title': 'Burning Everyone\'s Koran',
 513                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 514             }
 515         },
 516         # Normal age-gate video (No vevo, embed allowed)
 517         {
 518             'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
 519             'info_dict': {
 520                 'id': 'HtVdAasjOgU',
 521                 'ext': 'mp4',
 522                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 523                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 524                 'uploader': 'The Witcher',
 525                 'uploader_id': 'WitcherGame',
 526                 'upload_date': '20140605',
 527                 'age_limit': 18,
 528             },
 529         },
 530         # Age-gate video with encrypted signature
 531         {
 532             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
 533             'info_dict': {
 534                 'id': '6kLq3WMV1nU',
 535                 'ext': 'mp4',
 536                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 537                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 538                 'uploader': 'LloydVEVO',
 539                 'uploader_id': 'LloydVEVO',
 540                 'upload_date': '20110629',
 541                 'age_limit': 18,
 542             },
 543         },
 544         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 545         {
 546             'url': '__2ABJjxzNo',
 547             'info_dict': {
 548                 'id': '__2ABJjxzNo',
 549                 'ext': 'mp4',
 550                 'upload_date': '20100430',
 551                 'uploader_id': 'deadmau5',
 552                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 553                 'uploader': 'deadmau5',
 554                 'title': 'Deadmau5 - Some Chords (HD)',
 555             },
 556             'expected_warnings': [
 557                 'DASH manifest missing',
 558             ]
 559         },
 560         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 561         {
 562             'url': 'lqQg6PlCWgI',
 563             'info_dict': {
 564                 'id': 'lqQg6PlCWgI',
 565                 'ext': 'mp4',
 566                 'upload_date': '20120724',
 567                 'uploader_id': 'olympic',
 568                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 569                 'uploader': 'Olympics',
 570                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 571             },
 572             'params': {
 573                 'skip_download': 'requires avconv',
 574             }
 575         },
 576         # Non-square pixels
 577         {
 578             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 579             'info_dict': {
 580                 'id': '_b-2C3KPAM0',
 581                 'ext': 'mp4',
 582                 'stretched_ratio': 16 / 9.,
 583                 'upload_date': '20110310',
 584                 'uploader_id': 'AllenMeow',
 585                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 586                 'uploader': '孫艾倫',
 587                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 588             },
 589         },
 590         # url_encoded_fmt_stream_map is empty string
 591         {
 592             'url': 'qEJwOuvDf7I',
 593             'info_dict': {
 594                 'id': 'qEJwOuvDf7I',
 595                 'ext': 'webm',
 596                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 597                 'description': '',
 598                 'upload_date': '20150404',
 599                 'uploader_id': 'spbelect',
 600                 'uploader': 'Наблюдатели Петербурга',
 601             },
 602             'params': {
 603                 'skip_download': 'requires avconv',
 604             }
 605         },
 606         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 607         {
 608             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 609             'info_dict': {
 610                 'id': 'FIl7x6_3R5Y',
 611                 'ext': 'mp4',
 612                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 613                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 614                 'upload_date': '20150625',
 615                 'uploader_id': 'dorappi2000',
 616                 'uploader': 'dorappi2000',
 617                 'formats': 'mincount:33',
 618             },
 619         },
 620         # DASH manifest with segment_list
 621         {
 622             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 623             'md5': '8ce563a1d667b599d21064e982ab9e31',
 624             'info_dict': {
 625                 'id': 'CsmdDsKjzN8',
 626                 'ext': 'mp4',
 627                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 628                 'uploader': 'Airtek',
 629                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 630                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 631                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 632             },
 633             'params': {
 634                 'youtube_include_dash_manifest': True,
 635                 'format': '135',  # bestvideo
 636             }
 637         },
 638         {
 639             # Multifeed videos (multiple cameras), URL is for Main Camera
 640             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 641             'info_dict': {
 642                 'id': 'jqWvoWXjCVs',
 643                 'title': 'teamPGP: Rocket League Noob Stream',
 644                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 645             },
 646             'playlist': [{
 647                 'info_dict': {
 648                     'id': 'jqWvoWXjCVs',
 649                     'ext': 'mp4',
 650                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 651                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 652                     'upload_date': '20150721',
 653                     'uploader': 'Beer Games Beer',
 654                     'uploader_id': 'beergamesbeer',
 655                 },
 656             }, {
 657                 'info_dict': {
 658                     'id': '6h8e8xoXJzg',
 659                     'ext': 'mp4',
 660                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 661                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 662                     'upload_date': '20150721',
 663                     'uploader': 'Beer Games Beer',
 664                     'uploader_id': 'beergamesbeer',
 665                 },
 666             }, {
 667                 'info_dict': {
 668                     'id': 'PUOgX5z9xZw',
 669                     'ext': 'mp4',
 670                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 671                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 672                     'upload_date': '20150721',
 673                     'uploader': 'Beer Games Beer',
 674                     'uploader_id': 'beergamesbeer',
 675                 },
 676             }, {
 677                 'info_dict': {
 678                     'id': 'teuwxikvS5k',
 679                     'ext': 'mp4',
 680                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 681                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 682                     'upload_date': '20150721',
 683                     'uploader': 'Beer Games Beer',
 684                     'uploader_id': 'beergamesbeer',
 685                 },
 686             }],
 687             'params': {
 688                 'skip_download': True,
 689             },
 690         },
 691         {
 692             'url': 'http://vid.plus/FlRa-iH7PGw',
 693             'only_matching': True,
 694         },
 695         {
 696             # Title with JS-like syntax "};"
 697             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 698             'info_dict': {
 699                 'id': 'lsguqyKfVQg',
 700                 'ext': 'mp4',
 701                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 702                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 703                 'upload_date': '20151119',
 704                 'uploader_id': 'IronSoulElf',
 705                 'uploader': 'IronSoulElf',
 706             },
 707             'params': {
 708                 'skip_download': True,
 709             },
 710         },
 711     ]
 712
 713     def __init__(self, *args, **kwargs):
 714         super(YoutubeIE, self).__init__(*args, **kwargs)
 715         self._player_cache = {}
 716
 717     def report_video_info_webpage_download(self, video_id):
 718         """Report attempt to download video info webpage."""
 719         self.to_screen('%s: Downloading video info webpage' % video_id)
 720
 721     def report_information_extraction(self, video_id):
 722         """Report attempt to extract video information."""
 723         self.to_screen('%s: Extracting video information' % video_id)
 724
 725     def report_unavailable_format(self, video_id, format):
 726         """Report extracted video URL."""
 727         self.to_screen('%s: Format %s not available' % (video_id, format))
 728
 729     def report_rtmp_download(self):
 730         """Indicate the download will use the RTMP protocol."""
 731         self.to_screen('RTMP download detected')
 732
 733     def _signature_cache_id(self, example_sig):
 734         """ Return a string representation of a signature """
 735         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 736
 737     def _extract_signature_function(self, video_id, player_url, example_sig):
 738         id_m = re.match(
 739             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
 740             player_url)
 741         if not id_m:
 742             raise ExtractorError('Cannot identify player %r' % player_url)
 743         player_type = id_m.group('ext')
 744         player_id = id_m.group('id')
 745
 746         # Read from filesystem cache
 747         func_id = '%s_%s_%s' % (
 748             player_type, player_id, self._signature_cache_id(example_sig))
 749         assert os.path.basename(func_id) == func_id
 750
 751         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 752         if cache_spec is not None:
 753             return lambda s: ''.join(s[i] for i in cache_spec)
 754
 755         download_note = (
 756             'Downloading player %s' % player_url
 757             if self._downloader.params.get('verbose') else
 758             'Downloading %s player %s' % (player_type, player_id)
 759         )
 760         if player_type == 'js':
 761             code = self._download_webpage(
 762                 player_url, video_id,
 763                 note=download_note,
 764                 errnote='Download of %s failed' % player_url)
 765             res = self._parse_sig_js(code)
 766         elif player_type == 'swf':
 767             urlh = self._request_webpage(
 768                 player_url, video_id,
 769                 note=download_note,
 770                 errnote='Download of %s failed' % player_url)
 771             code = urlh.read()
 772             res = self._parse_sig_swf(code)
 773         else:
 774             assert False, 'Invalid player type %r' % player_type
 775
 776         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 777         cache_res = res(test_string)
 778         cache_spec = [ord(c) for c in cache_res]
 779
 780         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 781         return res
 782
 783     def _print_sig_code(self, func, example_sig):
 784         def gen_sig_code(idxs):
 785             def _genslice(start, end, step):
 786                 starts = '' if start == 0 else str(start)
 787                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
 788                 steps = '' if step == 1 else (':%d' % step)
 789                 return 's[%s%s%s]' % (starts, ends, steps)
 790
 791             step = None
 792             # Quelch pyflakes warnings - start will be set when step is set
 793             start = '(Never used)'
 794             for i, prev in zip(idxs[1:], idxs[:-1]):
 795                 if step is not None:
 796                     if i - prev == step:
 797                         continue
 798                     yield _genslice(start, prev, step)
 799                     step = None
 800                     continue
 801                 if i - prev in [-1, 1]:
 802                     step = i - prev
 803                     start = prev
 804                     continue
 805                 else:
 806                     yield 's[%d]' % prev
 807             if step is None:
 808                 yield 's[%d]' % i
 809             else:
 810                 yield _genslice(start, i, step)
 811
 812         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 813         cache_res = func(test_string)
 814         cache_spec = [ord(c) for c in cache_res]
 815         expr_code = ' + '.join(gen_sig_code(cache_spec))
 816         signature_id_tuple = '(%s)' % (
 817             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 818         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 819                 '    return %s\n') % (signature_id_tuple, expr_code)
 820         self.to_screen('Extracted signature function:\n' + code)
 821
 822     def _parse_sig_js(self, jscode):
 823         funcname = self._search_regex(
 824             r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
 825             'Initial JS player signature function name')
 826
 827         jsi = JSInterpreter(jscode)
 828         initial_function = jsi.extract_function(funcname)
 829         return lambda s: initial_function([s])
 830
 831     def _parse_sig_swf(self, file_contents):
 832         swfi = SWFInterpreter(file_contents)
 833         TARGET_CLASSNAME = 'SignatureDecipher'
 834         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 835         initial_function = swfi.extract_function(searched_class, 'decipher')
 836         return lambda s: initial_function([s])
 837
 838     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 839         """Turn the encrypted s field into a working signature"""
 840
 841         if player_url is None:
 842             raise ExtractorError('Cannot decrypt signature without player_url')
 843
 844         if player_url.startswith('//'):
 845             player_url = 'https:' + player_url
 846         try:
 847             player_id = (player_url, self._signature_cache_id(s))
 848             if player_id not in self._player_cache:
 849                 func = self._extract_signature_function(
 850                     video_id, player_url, s
 851                 )
 852                 self._player_cache[player_id] = func
 853             func = self._player_cache[player_id]
 854             if self._downloader.params.get('youtube_print_sig_code'):
 855                 self._print_sig_code(func, s)
 856             return func(s)
 857         except Exception as e:
 858             tb = traceback.format_exc()
 859             raise ExtractorError(
 860                 'Signature extraction failed: ' + tb, cause=e)
 861
 862     def _get_subtitles(self, video_id, webpage):
 863         try:
 864             subs_doc = self._download_xml(
 865                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
 866                 video_id, note=False)
 867         except ExtractorError as err:
 868             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
 869             return {}
 870
 871         sub_lang_list = {}
 872         for track in subs_doc.findall('track'):
 873             lang = track.attrib['lang_code']
 874             if lang in sub_lang_list:
 875                 continue
 876             sub_formats = []
 877             for ext in ['sbv', 'vtt', 'srt']:
 878                 params = compat_urllib_parse.urlencode({
 879                     'lang': lang,
 880                     'v': video_id,
 881                     'fmt': ext,
 882                     'name': track.attrib['name'].encode('utf-8'),
 883                 })
 884                 sub_formats.append({
 885                     'url': 'https://www.youtube.com/api/timedtext?' + params,
 886                     'ext': ext,
 887                 })
 888             sub_lang_list[lang] = sub_formats
 889         if not sub_lang_list:
 890             self._downloader.report_warning('video doesn\'t have subtitles')
 891             return {}
 892         return sub_lang_list
 893
 894     def _get_ytplayer_config(self, webpage):
 895         patterns = [
 896             r';ytplayer\.config\s*=\s*({.*?});ytplayer',
 897             r';ytplayer\.config\s*=\s*({.*?});',
 898         ]
 899         config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
 900         if config is not None:
 901             return json.loads(uppercase_escape(config))
 902
 903     def _get_automatic_captions(self, video_id, webpage):
 904         """We need the webpage for getting the captions url, pass it as an
 905            argument to speed up the process."""
 906         self.to_screen('%s: Looking for automatic captions' % video_id)
 907         player_config = self._get_ytplayer_config(webpage)
 908         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
 909         if player_config is None:
 910             self._downloader.report_warning(err_msg)
 911             return {}
 912         try:
 913             args = player_config['args']
 914             caption_url = args['ttsurl']
 915             timestamp = args['timestamp']
 916             # We get the available subtitles
 917             list_params = compat_urllib_parse.urlencode({
 918                 'type': 'list',
 919                 'tlangs': 1,
 920                 'asrs': 1,
 921             })
 922             list_url = caption_url + '&' + list_params
 923             caption_list = self._download_xml(list_url, video_id)
 924             original_lang_node = caption_list.find('track')
 925             if original_lang_node is None:
 926                 self._downloader.report_warning('Video doesn\'t have automatic captions')
 927                 return {}
 928             original_lang = original_lang_node.attrib['lang_code']
 929             caption_kind = original_lang_node.attrib.get('kind', '')
 930
 931             sub_lang_list = {}
 932             for lang_node in caption_list.findall('target'):
 933                 sub_lang = lang_node.attrib['lang_code']
 934                 sub_formats = []
 935                 for ext in ['sbv', 'vtt', 'srt']:
 936                     params = compat_urllib_parse.urlencode({
 937                         'lang': original_lang,
 938                         'tlang': sub_lang,
 939                         'fmt': ext,
 940                         'ts': timestamp,
 941                         'kind': caption_kind,
 942                     })
 943                     sub_formats.append({
 944                         'url': caption_url + '&' + params,
 945                         'ext': ext,
 946                     })
 947                 sub_lang_list[sub_lang] = sub_formats
 948             return sub_lang_list
 949         # An extractor error can be raise by the download process if there are
 950         # no automatic captions but there are subtitles
 951         except (KeyError, ExtractorError):
 952             self._downloader.report_warning(err_msg)
 953             return {}
 954
 955     @classmethod
 956     def extract_id(cls, url):
 957         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
 958         if mobj is None:
 959             raise ExtractorError('Invalid URL: %s' % url)
 960         video_id = mobj.group(2)
 961         return video_id
 962
 963     def _extract_from_m3u8(self, manifest_url, video_id):
 964         url_map = {}
 965
 966         def _get_urls(_manifest):
 967             lines = _manifest.split('\n')
 968             urls = filter(lambda l: l and not l.startswith('#'),
 969                           lines)
 970             return urls
 971         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
 972         formats_urls = _get_urls(manifest)
 973         for format_url in formats_urls:
 974             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 975             url_map[itag] = format_url
 976         return url_map
 977
 978     def _extract_annotations(self, video_id):
 979         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 980         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 981
 982     def _parse_dash_manifest(
 983             self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
 984         def decrypt_sig(mobj):
 985             s = mobj.group(1)
 986             dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
 987             return '/signature/%s' % dec_s
 988         dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
 989         dash_doc = self._download_xml(
 990             dash_manifest_url, video_id,
 991             note='Downloading DASH manifest',
 992             errnote='Could not download DASH manifest',
 993             fatal=fatal)
 994
 995         if dash_doc is False:
 996             return []
 997
 998         formats = []
 999         for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
1000             mime_type = a.attrib.get('mimeType')
1001             for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1002                 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1003                 if url_el is None:
1004                     continue
1005                 if mime_type == 'text/vtt':
1006                     # TODO implement WebVTT downloading
1007                     pass
1008                 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
1009                     segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
1010                     format_id = r.attrib['id']
1011                     video_url = url_el.text
1012                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1013                     f = {
1014                         'format_id': format_id,
1015                         'url': video_url,
1016                         'width': int_or_none(r.attrib.get('width')),
1017                         'height': int_or_none(r.attrib.get('height')),
1018                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1019                         'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1020                         'filesize': filesize,
1021                         'fps': int_or_none(r.attrib.get('frameRate')),
1022                     }
1023                     if segment_list is not None:
1024                         f.update({
1025                             'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1026                             'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1027                             'protocol': 'http_dash_segments',
1028                         })
1029                     try:
1030                         existing_format = next(
1031                             fo for fo in formats
1032                             if fo['format_id'] == format_id)
1033                     except StopIteration:
1034                         full_info = self._formats.get(format_id, {}).copy()
1035                         full_info.update(f)
1036                         codecs = r.attrib.get('codecs')
1037                         if codecs:
1038                             if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1039                                 full_info['vcodec'] = codecs
1040                             elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1041                                 full_info['acodec'] = codecs
1042                         formats.append(full_info)
1043                     else:
1044                         existing_format.update(f)
1045                 else:
1046                     self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1047         return formats
1048
1049     def _real_extract(self, url):
1050         url, smuggled_data = unsmuggle_url(url, {})
1051
1052         proto = (
1053             'http' if self._downloader.params.get('prefer_insecure', False)
1054             else 'https')
1055
1056         start_time = None
1057         end_time = None
1058         parsed_url = compat_urllib_parse_urlparse(url)
1059         for component in [parsed_url.fragment, parsed_url.query]:
1060             query = compat_parse_qs(component)
1061             if start_time is None and 't' in query:
1062                 start_time = parse_duration(query['t'][0])
1063             if start_time is None and 'start' in query:
1064                 start_time = parse_duration(query['start'][0])
1065             if end_time is None and 'end' in query:
1066                 end_time = parse_duration(query['end'][0])
1067
1068         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1069         mobj = re.search(self._NEXT_URL_RE, url)
1070         if mobj:
1071             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1072         video_id = self.extract_id(url)
1073
1074         # Get video webpage
1075         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1076         video_webpage = self._download_webpage(url, video_id)
1077
1078         # Attempt to extract SWF player URL
1079         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1080         if mobj is not None:
1081             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1082         else:
1083             player_url = None
1084
1085         dash_mpds = []
1086
1087         def add_dash_mpd(video_info):
1088             dash_mpd = video_info.get('dashmpd')
1089             if dash_mpd and dash_mpd[0] not in dash_mpds:
1090                 dash_mpds.append(dash_mpd[0])
1091
1092         # Get video info
1093         embed_webpage = None
1094         is_live = None
1095         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1096             age_gate = True
1097             # We simulate the access to the video from www.youtube.com/v/{video_id}
1098             # this can be viewed without login into Youtube
1099             url = proto + '://www.youtube.com/embed/%s' % video_id
1100             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1101             data = compat_urllib_parse.urlencode({
1102                 'video_id': video_id,
1103                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1104                 'sts': self._search_regex(
1105                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1106             })
1107             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1108             video_info_webpage = self._download_webpage(
1109                 video_info_url, video_id,
1110                 note='Refetching age-gated info webpage',
1111                 errnote='unable to download video info webpage')
1112             video_info = compat_parse_qs(video_info_webpage)
1113             add_dash_mpd(video_info)
1114         else:
1115             age_gate = False
1116             video_info = None
1117             # Try looking directly into the video webpage
1118             ytplayer_config = self._get_ytplayer_config(video_webpage)
1119             if ytplayer_config is not None:
1120                 args = ytplayer_config['args']
1121                 if args.get('url_encoded_fmt_stream_map'):
1122                     # Convert to the same format returned by compat_parse_qs
1123                     video_info = dict((k, [v]) for k, v in args.items())
1124                     add_dash_mpd(video_info)
1125                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1126                     is_live = True
1127             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1128                 # We also try looking in get_video_info since it may contain different dashmpd
1129                 # URL that points to a DASH manifest with possibly different itag set (some itags
1130                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1131                 # manifest pointed by get_video_info's dashmpd).
1132                 # The general idea is to take a union of itags of both DASH manifests (for example
1133                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1134                 self.report_video_info_webpage_download(video_id)
1135                 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1136                     video_info_url = (
1137                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1138                         % (proto, video_id, el_type))
1139                     video_info_webpage = self._download_webpage(
1140                         video_info_url,
1141                         video_id, note=False,
1142                         errnote='unable to download video info webpage')
1143                     get_video_info = compat_parse_qs(video_info_webpage)
1144                     if get_video_info.get('use_cipher_signature') != ['True']:
1145                         add_dash_mpd(get_video_info)
1146                     if not video_info:
1147                         video_info = get_video_info
1148                     if 'token' in get_video_info:
1149                         # Different get_video_info requests may report different results, e.g.
1150                         # some may report video unavailability, but some may serve it without
1151                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1152                         # the original webpage as well as el=info and el=embedded get_video_info
1153                         # requests report video unavailability due to geo restriction while
1154                         # el=detailpage succeeds and returns valid data). This is probably
1155                         # due to YouTube measures against IP ranges of hosting providers.
1156                         # Working around by preferring the first succeeded video_info containing
1157                         # the token if no such video_info yet was found.
1158                         if 'token' not in video_info:
1159                             video_info = get_video_info
1160                         break
1161         if 'token' not in video_info:
1162             if 'reason' in video_info:
1163                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1164                     regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1165                     if regions_allowed:
1166                         raise ExtractorError('YouTube said: This video is available in %s only' % (
1167                             ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1168                             expected=True)
1169                 raise ExtractorError(
1170                     'YouTube said: %s' % video_info['reason'][0],
1171                     expected=True, video_id=video_id)
1172             else:
1173                 raise ExtractorError(
1174                     '"token" parameter not in video info for unknown reason',
1175                     video_id=video_id)
1176
1177         # title
1178         if 'title' in video_info:
1179             video_title = video_info['title'][0]
1180         else:
1181             self._downloader.report_warning('Unable to extract video title')
1182             video_title = '_'
1183
1184         # description
1185         video_description = get_element_by_id("eow-description", video_webpage)
1186         if video_description:
1187             video_description = re.sub(r'''(?x)
1188                 <a\s+
1189                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1190                     title="([^"]+)"\s+
1191                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1192                     class="yt-uix-redirect-link"\s*>
1193                 [^<]+
1194                 </a>
1195             ''', r'\1', video_description)
1196             video_description = clean_html(video_description)
1197         else:
1198             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1199             if fd_mobj:
1200                 video_description = unescapeHTML(fd_mobj.group(1))
1201             else:
1202                 video_description = ''
1203
1204         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1205             if not self._downloader.params.get('noplaylist'):
1206                 entries = []
1207                 feed_ids = []
1208                 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1209                 for feed in multifeed_metadata_list.split(','):
1210                     feed_data = compat_parse_qs(feed)
1211                     entries.append({
1212                         '_type': 'url_transparent',
1213                         'ie_key': 'Youtube',
1214                         'url': smuggle_url(
1215                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1216                             {'force_singlefeed': True}),
1217                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1218                     })
1219                     feed_ids.append(feed_data['id'][0])
1220                 self.to_screen(
1221                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1222                     % (', '.join(feed_ids), video_id))
1223                 return self.playlist_result(entries, video_id, video_title, video_description)
1224             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1225
1226         if 'view_count' in video_info:
1227             view_count = int(video_info['view_count'][0])
1228         else:
1229             view_count = None
1230
1231         # Check for "rental" videos
1232         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1233             raise ExtractorError('"rental" videos not supported')
1234
1235         # Start extracting information
1236         self.report_information_extraction(video_id)
1237
1238         # uploader
1239         if 'author' not in video_info:
1240             raise ExtractorError('Unable to extract uploader name')
1241         video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1242
1243         # uploader_id
1244         video_uploader_id = None
1245         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1246         if mobj is not None:
1247             video_uploader_id = mobj.group(1)
1248         else:
1249             self._downloader.report_warning('unable to extract uploader nickname')
1250
1251         # thumbnail image
1252         # We try first to get a high quality image:
1253         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1254                             video_webpage, re.DOTALL)
1255         if m_thumb is not None:
1256             video_thumbnail = m_thumb.group(1)
1257         elif 'thumbnail_url' not in video_info:
1258             self._downloader.report_warning('unable to extract video thumbnail')
1259             video_thumbnail = None
1260         else:   # don't panic if we can't find it
1261             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1262
1263         # upload date
1264         upload_date = self._html_search_meta(
1265             'datePublished', video_webpage, 'upload date', default=None)
1266         if not upload_date:
1267             upload_date = self._search_regex(
1268                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1269                  r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1270                 video_webpage, 'upload date', default=None)
1271             if upload_date:
1272                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1273         upload_date = unified_strdate(upload_date)
1274
1275         m_cat_container = self._search_regex(
1276             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1277             video_webpage, 'categories', default=None)
1278         if m_cat_container:
1279             category = self._html_search_regex(
1280                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1281                 default=None)
1282             video_categories = None if category is None else [category]
1283         else:
1284             video_categories = None
1285
1286         video_tags = [
1287             unescapeHTML(m.group('content'))
1288             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1289
1290         def _extract_count(count_name):
1291             return str_to_int(self._search_regex(
1292                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1293                 % re.escape(count_name),
1294                 video_webpage, count_name, default=None))
1295
1296         like_count = _extract_count('like')
1297         dislike_count = _extract_count('dislike')
1298
1299         # subtitles
1300         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1301         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1302
1303         if 'length_seconds' not in video_info:
1304             self._downloader.report_warning('unable to extract video duration')
1305             video_duration = None
1306         else:
1307             video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1308
1309         # annotations
1310         video_annotations = None
1311         if self._downloader.params.get('writeannotations', False):
1312             video_annotations = self._extract_annotations(video_id)
1313
1314         def _map_to_format_list(urlmap):
1315             formats = []
1316             for itag, video_real_url in urlmap.items():
1317                 dct = {
1318                     'format_id': itag,
1319                     'url': video_real_url,
1320                     'player_url': player_url,
1321                 }
1322                 if itag in self._formats:
1323                     dct.update(self._formats[itag])
1324                 formats.append(dct)
1325             return formats
1326
1327         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1328             self.report_rtmp_download()
1329             formats = [{
1330                 'format_id': '_rtmp',
1331                 'protocol': 'rtmp',
1332                 'url': video_info['conn'][0],
1333                 'player_url': player_url,
1334             }]
1335         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1336             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1337             if 'rtmpe%3Dyes' in encoded_url_map:
1338                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1339             formats = []
1340             for url_data_str in encoded_url_map.split(','):
1341                 url_data = compat_parse_qs(url_data_str)
1342                 if 'itag' not in url_data or 'url' not in url_data:
1343                     continue
1344                 format_id = url_data['itag'][0]
1345                 url = url_data['url'][0]
1346
1347                 if 'sig' in url_data:
1348                     url += '&signature=' + url_data['sig'][0]
1349                 elif 's' in url_data:
1350                     encrypted_sig = url_data['s'][0]
1351                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1352
1353                     jsplayer_url_json = self._search_regex(
1354                         ASSETS_RE,
1355                         embed_webpage if age_gate else video_webpage,
1356                         'JS player URL (1)', default=None)
1357                     if not jsplayer_url_json and not age_gate:
1358                         # We need the embed website after all
1359                         if embed_webpage is None:
1360                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1361                             embed_webpage = self._download_webpage(
1362                                 embed_url, video_id, 'Downloading embed webpage')
1363                         jsplayer_url_json = self._search_regex(
1364                             ASSETS_RE, embed_webpage, 'JS player URL')
1365
1366                     player_url = json.loads(jsplayer_url_json)
1367                     if player_url is None:
1368                         player_url_json = self._search_regex(
1369                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1370                             video_webpage, 'age gate player URL')
1371                         player_url = json.loads(player_url_json)
1372
1373                     if self._downloader.params.get('verbose'):
1374                         if player_url is None:
1375                             player_version = 'unknown'
1376                             player_desc = 'unknown'
1377                         else:
1378                             if player_url.endswith('swf'):
1379                                 player_version = self._search_regex(
1380                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1381                                     'flash player', fatal=False)
1382                                 player_desc = 'flash player %s' % player_version
1383                             else:
1384                                 player_version = self._search_regex(
1385                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1386                                     player_url,
1387                                     'html5 player', fatal=False)
1388                                 player_desc = 'html5 player %s' % player_version
1389
1390                         parts_sizes = self._signature_cache_id(encrypted_sig)
1391                         self.to_screen('{%s} signature length %s, %s' %
1392                                        (format_id, parts_sizes, player_desc))
1393
1394                     signature = self._decrypt_signature(
1395                         encrypted_sig, video_id, player_url, age_gate)
1396                     url += '&signature=' + signature
1397                 if 'ratebypass' not in url:
1398                     url += '&ratebypass=yes'
1399
1400                 # Some itags are not included in DASH manifest thus corresponding formats will
1401                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1402                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1403                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1404                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1405                 dct = {
1406                     'format_id': format_id,
1407                     'url': url,
1408                     'player_url': player_url,
1409                     'filesize': int_or_none(url_data.get('clen', [None])[0]),
1410                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1411                     'width': width,
1412                     'height': height,
1413                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1414                     'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1415                 }
1416                 type_ = url_data.get('type', [None])[0]
1417                 if type_:
1418                     type_split = type_.split(';')
1419                     kind_ext = type_split[0].split('/')
1420                     if len(kind_ext) == 2:
1421                         kind, ext = kind_ext
1422                         dct['ext'] = ext
1423                         if kind in ('audio', 'video'):
1424                             codecs = None
1425                             for mobj in re.finditer(
1426                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1427                                 if mobj.group('key') == 'codecs':
1428                                     codecs = mobj.group('val')
1429                                     break
1430                             if codecs:
1431                                 codecs = codecs.split(',')
1432                                 if len(codecs) == 2:
1433                                     acodec, vcodec = codecs[0], codecs[1]
1434                                 else:
1435                                     acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1436                                 dct.update({
1437                                     'acodec': acodec,
1438                                     'vcodec': vcodec,
1439                                 })
1440                 if format_id in self._formats:
1441                     dct.update(self._formats[format_id])
1442                 formats.append(dct)
1443         elif video_info.get('hlsvp'):
1444             manifest_url = video_info['hlsvp'][0]
1445             url_map = self._extract_from_m3u8(manifest_url, video_id)
1446             formats = _map_to_format_list(url_map)
1447         else:
1448             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1449
1450         # Look for the DASH manifest
1451         if self._downloader.params.get('youtube_include_dash_manifest', True):
1452             dash_mpd_fatal = True
1453             for dash_manifest_url in dash_mpds:
1454                 dash_formats = {}
1455                 try:
1456                     for df in self._parse_dash_manifest(
1457                             video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1458                         # Do not overwrite DASH format found in some previous DASH manifest
1459                         if df['format_id'] not in dash_formats:
1460                             dash_formats[df['format_id']] = df
1461                         # Additional DASH manifests may end up in HTTP Error 403 therefore
1462                         # allow them to fail without bug report message if we already have
1463                         # some DASH manifest succeeded. This is temporary workaround to reduce
1464                         # burst of bug reports until we figure out the reason and whether it
1465                         # can be fixed at all.
1466                         dash_mpd_fatal = False
1467                 except (ExtractorError, KeyError) as e:
1468                     self.report_warning(
1469                         'Skipping DASH manifest: %r' % e, video_id)
1470                 if dash_formats:
1471                     # Remove the formats we found through non-DASH, they
1472                     # contain less info and it can be wrong, because we use
1473                     # fixed values (for example the resolution). See
1474                     # https://github.com/rg3/youtube-dl/issues/5774 for an
1475                     # example.
1476                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1477                     formats.extend(dash_formats.values())
1478
1479         # Check for malformed aspect ratio
1480         stretched_m = re.search(
1481             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1482             video_webpage)
1483         if stretched_m:
1484             ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1485             for f in formats:
1486                 if f.get('vcodec') != 'none':
1487                     f['stretched_ratio'] = ratio
1488
1489         self._sort_formats(formats)
1490
1491         return {
1492             'id': video_id,
1493             'uploader': video_uploader,
1494             'uploader_id': video_uploader_id,
1495             'upload_date': upload_date,
1496             'title': video_title,
1497             'thumbnail': video_thumbnail,
1498             'description': video_description,
1499             'categories': video_categories,
1500             'tags': video_tags,
1501             'subtitles': video_subtitles,
1502             'automatic_captions': automatic_captions,
1503             'duration': video_duration,
1504             'age_limit': 18 if age_gate else 0,
1505             'annotations': video_annotations,
1506             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1507             'view_count': view_count,
1508             'like_count': like_count,
1509             'dislike_count': dislike_count,
1510             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1511             'formats': formats,
1512             'is_live': is_live,
1513             'start_time': start_time,
1514             'end_time': end_time,
1515         }
1516
1517
1518 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1519     IE_DESC = 'YouTube.com playlists'
1520     _VALID_URL = r"""(?x)(?:
1521                         (?:https?://)?
1522                         (?:\w+\.)?
1523                         youtube\.com/
1524                         (?:
1525                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1526                            \? (?:.*?&)*? (?:p|a|list)=
1527                         |  p/
1528                         )
1529                         (
1530                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1531                             # Top tracks, they can also include dots
1532                             |(?:MC)[\w\.]*
1533                         )
1534                         .*
1535                      |
1536                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1537                      )"""
1538     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1539     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1540     IE_NAME = 'youtube:playlist'
1541     _TESTS = [{
1542         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1543         'info_dict': {
1544             'title': 'ytdl test PL',
1545             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1546         },
1547         'playlist_count': 3,
1548     }, {
1549         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1550         'info_dict': {
1551             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1552             'title': 'YDL_Empty_List',
1553         },
1554         'playlist_count': 0,
1555     }, {
1556         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1557         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1558         'info_dict': {
1559             'title': '29C3: Not my department',
1560             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1561         },
1562         'playlist_count': 95,
1563     }, {
1564         'note': 'issue #673',
1565         'url': 'PLBB231211A4F62143',
1566         'info_dict': {
1567             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1568             'id': 'PLBB231211A4F62143',
1569         },
1570         'playlist_mincount': 26,
1571     }, {
1572         'note': 'Large playlist',
1573         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1574         'info_dict': {
1575             'title': 'Uploads from Cauchemar',
1576             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1577         },
1578         'playlist_mincount': 799,
1579     }, {
1580         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1581         'info_dict': {
1582             'title': 'YDL_safe_search',
1583             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1584         },
1585         'playlist_count': 2,
1586     }, {
1587         'note': 'embedded',
1588         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1589         'playlist_count': 4,
1590         'info_dict': {
1591             'title': 'JODA15',
1592             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1593         }
1594     }, {
1595         'note': 'Embedded SWF player',
1596         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1597         'playlist_count': 4,
1598         'info_dict': {
1599             'title': 'JODA7',
1600             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1601         }
1602     }, {
1603         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1604         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1605         'info_dict': {
1606             'title': 'Uploads from Interstellar Movie',
1607             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1608         },
1609         'playlist_mincout': 21,
1610     }]
1611
1612     def _real_initialize(self):
1613         self._login()
1614
1615     def _extract_mix(self, playlist_id):
1616         # The mixes are generated from a single video
1617         # the id of the playlist is just 'RD' + video_id
1618         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1619         webpage = self._download_webpage(
1620             url, playlist_id, 'Downloading Youtube mix')
1621         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1622         title_span = (
1623             search_title('playlist-title') or
1624             search_title('title long-title') or
1625             search_title('title'))
1626         title = clean_html(title_span)
1627         ids = orderedSet(re.findall(
1628             r'''(?xs)data-video-username=".*?".*?
1629                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1630             webpage))
1631         url_results = self._ids_to_results(ids)
1632
1633         return self.playlist_result(url_results, playlist_id, title)
1634
1635     def _extract_playlist(self, playlist_id):
1636         url = self._TEMPLATE_URL % playlist_id
1637         page = self._download_webpage(url, playlist_id)
1638
1639         for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1640             match = match.strip()
1641             # Check if the playlist exists or is private
1642             if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1643                 raise ExtractorError(
1644                     'The playlist doesn\'t exist or is private, use --username or '
1645                     '--netrc to access it.',
1646                     expected=True)
1647             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1648                 raise ExtractorError(
1649                     'Invalid parameters. Maybe URL is incorrect.',
1650                     expected=True)
1651             elif re.match(r'[^<]*Choose your language[^<]*', match):
1652                 continue
1653             else:
1654                 self.report_warning('Youtube gives an alert message: ' + match)
1655
1656         playlist_title = self._html_search_regex(
1657             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1658             page, 'title')
1659
1660         return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1661
1662     def _real_extract(self, url):
1663         # Extract playlist id
1664         mobj = re.match(self._VALID_URL, url)
1665         if mobj is None:
1666             raise ExtractorError('Invalid URL: %s' % url)
1667         playlist_id = mobj.group(1) or mobj.group(2)
1668
1669         # Check if it's a video-specific URL
1670         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1671         if 'v' in query_dict:
1672             video_id = query_dict['v'][0]
1673             if self._downloader.params.get('noplaylist'):
1674                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1675                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1676             else:
1677                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1678
1679         if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1680             # Mixes require a custom extraction process
1681             return self._extract_mix(playlist_id)
1682
1683         return self._extract_playlist(playlist_id)
1684
1685
1686 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1687     IE_DESC = 'YouTube.com channels'
1688     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1689     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1690     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1691     IE_NAME = 'youtube:channel'
1692     _TESTS = [{
1693         'note': 'paginated channel',
1694         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1695         'playlist_mincount': 91,
1696         'info_dict': {
1697             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1698             'title': 'Uploads from lex will',
1699         }
1700     }, {
1701         'note': 'Age restricted channel',
1702         # from https://www.youtube.com/user/DeusExOfficial
1703         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1704         'playlist_mincount': 64,
1705         'info_dict': {
1706             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1707             'title': 'Uploads from Deus Ex',
1708         },
1709     }]
1710
1711     def _real_extract(self, url):
1712         channel_id = self._match_id(url)
1713
1714         url = self._TEMPLATE_URL % channel_id
1715
1716         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1717         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1718         # otherwise fallback on channel by page extraction
1719         channel_page = self._download_webpage(
1720             url + '?view=57', channel_id,
1721             'Downloading channel page', fatal=False)
1722         if channel_page is False:
1723             channel_playlist_id = False
1724         else:
1725             channel_playlist_id = self._html_search_meta(
1726                 'channelId', channel_page, 'channel id', default=None)
1727             if not channel_playlist_id:
1728                 channel_playlist_id = self._search_regex(
1729                     r'data-(?:channel-external-|yt)id="([^"]+)"',
1730                     channel_page, 'channel id', default=None)
1731         if channel_playlist_id and channel_playlist_id.startswith('UC'):
1732             playlist_id = 'UU' + channel_playlist_id[2:]
1733             return self.url_result(
1734                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1735
1736         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1737         autogenerated = re.search(r'''(?x)
1738                 class="[^"]*?(?:
1739                     channel-header-autogenerated-label|
1740                     yt-channel-title-autogenerated
1741                 )[^"]*"''', channel_page) is not None
1742
1743         if autogenerated:
1744             # The videos are contained in a single page
1745             # the ajax pages can't be used, they are empty
1746             entries = [
1747                 self.url_result(
1748                     video_id, 'Youtube', video_id=video_id,
1749                     video_title=video_title)
1750                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1751             return self.playlist_result(entries, channel_id)
1752
1753         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1754
1755
1756 class YoutubeUserIE(YoutubeChannelIE):
1757     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1758     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1759     _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1760     IE_NAME = 'youtube:user'
1761
1762     _TESTS = [{
1763         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1764         'playlist_mincount': 320,
1765         'info_dict': {
1766             'title': 'TheLinuxFoundation',
1767         }
1768     }, {
1769         'url': 'ytuser:phihag',
1770         'only_matching': True,
1771     }]
1772
1773     @classmethod
1774     def suitable(cls, url):
1775         # Don't return True if the url can be extracted with other youtube
1776         # extractor, the regex would is too permissive and it would match.
1777         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1778         if any(ie.suitable(url) for ie in other_ies):
1779             return False
1780         else:
1781             return super(YoutubeUserIE, cls).suitable(url)
1782
1783
1784 class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1785     IE_DESC = 'YouTube.com user playlists'
1786     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
1787     IE_NAME = 'youtube:user:playlists'
1788
1789     _TESTS = [{
1790         'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1791         'playlist_mincount': 4,
1792         'info_dict': {
1793             'id': 'ThirstForScience',
1794             'title': 'Thirst for Science',
1795         },
1796     }, {
1797         # with "Load more" button
1798         'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
1799         'playlist_mincount': 70,
1800         'info_dict': {
1801             'id': 'igorkle1',
1802             'title': 'Игорь Клейнер',
1803         },
1804     }]
1805
1806
1807 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1808     IE_DESC = 'YouTube.com searches'
1809     # there doesn't appear to be a real limit, for example if you search for
1810     # 'python' you get more than 8.000.000 results
1811     _MAX_RESULTS = float('inf')
1812     IE_NAME = 'youtube:search'
1813     _SEARCH_KEY = 'ytsearch'
1814     _EXTRA_QUERY_ARGS = {}
1815     _TESTS = []
1816
1817     def _get_n_results(self, query, n):
1818         """Get a specified number of results for a query"""
1819
1820         videos = []
1821         limit = n
1822
1823         for pagenum in itertools.count(1):
1824             url_query = {
1825                 'search_query': query.encode('utf-8'),
1826                 'page': pagenum,
1827                 'spf': 'navigate',
1828             }
1829             url_query.update(self._EXTRA_QUERY_ARGS)
1830             result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1831             data = self._download_json(
1832                 result_url, video_id='query "%s"' % query,
1833                 note='Downloading page %s' % pagenum,
1834                 errnote='Unable to download API page')
1835             html_content = data[1]['body']['content']
1836
1837             if 'class="search-message' in html_content:
1838                 raise ExtractorError(
1839                     '[youtube] No video results', expected=True)
1840
1841             new_videos = self._ids_to_results(orderedSet(re.findall(
1842                 r'href="/watch\?v=(.{11})', html_content)))
1843             videos += new_videos
1844             if not new_videos or len(videos) > limit:
1845                 break
1846
1847         if len(videos) > n:
1848             videos = videos[:n]
1849         return self.playlist_result(videos, query)
1850
1851
1852 class YoutubeSearchDateIE(YoutubeSearchIE):
1853     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1854     _SEARCH_KEY = 'ytsearchdate'
1855     IE_DESC = 'YouTube.com searches, newest videos first'
1856     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1857
1858
1859 class YoutubeSearchURLIE(InfoExtractor):
1860     IE_DESC = 'YouTube.com search URLs'
1861     IE_NAME = 'youtube:search_url'
1862     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1863     _TESTS = [{
1864         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1865         'playlist_mincount': 5,
1866         'info_dict': {
1867             'title': 'youtube-dl test video',
1868         }
1869     }]
1870
1871     def _real_extract(self, url):
1872         mobj = re.match(self._VALID_URL, url)
1873         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1874
1875         webpage = self._download_webpage(url, query)
1876         result_code = self._search_regex(
1877             r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1878
1879         part_codes = re.findall(
1880             r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1881         entries = []
1882         for part_code in part_codes:
1883             part_title = self._html_search_regex(
1884                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1885             part_url_snippet = self._html_search_regex(
1886                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1887             part_url = compat_urlparse.urljoin(
1888                 'https://www.youtube.com/', part_url_snippet)
1889             entries.append({
1890                 '_type': 'url',
1891                 'url': part_url,
1892                 'title': part_title,
1893             })
1894
1895         return {
1896             '_type': 'playlist',
1897             'entries': entries,
1898             'title': query,
1899         }
1900
1901
1902 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
1903     IE_DESC = 'YouTube.com (multi-season) shows'
1904     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1905     IE_NAME = 'youtube:show'
1906     _TESTS = [{
1907         'url': 'https://www.youtube.com/show/airdisasters',
1908         'playlist_mincount': 5,
1909         'info_dict': {
1910             'id': 'airdisasters',
1911             'title': 'Air Disasters',
1912         }
1913     }]
1914
1915     def _real_extract(self, url):
1916         playlist_id = self._match_id(url)
1917         return super(YoutubeShowIE, self)._real_extract(
1918             'https://www.youtube.com/show/%s/playlists' % playlist_id)
1919
1920
1921 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1922     """
1923     Base class for feed extractors
1924     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1925     """
1926     _LOGIN_REQUIRED = True
1927
1928     @property
1929     def IE_NAME(self):
1930         return 'youtube:%s' % self._FEED_NAME
1931
1932     def _real_initialize(self):
1933         self._login()
1934
1935     def _real_extract(self, url):
1936         page = self._download_webpage(
1937             'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1938
1939         # The extraction process is the same as for playlists, but the regex
1940         # for the video ids doesn't contain an index
1941         ids = []
1942         more_widget_html = content_html = page
1943         for page_num in itertools.count(1):
1944             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1945
1946             # 'recommended' feed has infinite 'load more' and each new portion spins
1947             # the same videos in (sometimes) slightly different order, so we'll check
1948             # for unicity and break when portion has no new videos
1949             new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1950             if not new_ids:
1951                 break
1952
1953             ids.extend(new_ids)
1954
1955             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1956             if not mobj:
1957                 break
1958
1959             more = self._download_json(
1960                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1961                 'Downloading page #%s' % page_num,
1962                 transform_source=uppercase_escape)
1963             content_html = more['content_html']
1964             more_widget_html = more['load_more_widget_html']
1965
1966         return self.playlist_result(
1967             self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1968
1969
1970 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1971     IE_NAME = 'youtube:watchlater'
1972     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1973     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1974
1975     _TESTS = []  # override PlaylistIE tests
1976
1977     def _real_extract(self, url):
1978         return self._extract_playlist('WL')
1979
1980
1981 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1982     IE_NAME = 'youtube:favorites'
1983     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1984     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1985     _LOGIN_REQUIRED = True
1986
1987     def _real_extract(self, url):
1988         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1989         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1990         return self.url_result(playlist_id, 'YoutubePlaylist')
1991
1992
1993 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1994     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1995     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1996     _FEED_NAME = 'recommended'
1997     _PLAYLIST_TITLE = 'Youtube Recommended videos'
1998
1999
2000 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2001     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2002     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2003     _FEED_NAME = 'subscriptions'
2004     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2005
2006
2007 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2008     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2009     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2010     _FEED_NAME = 'history'
2011     _PLAYLIST_TITLE = 'Youtube History'
2012
2013
2014 class YoutubeTruncatedURLIE(InfoExtractor):
2015     IE_NAME = 'youtube:truncated_url'
2016     IE_DESC = False  # Do not list
2017     _VALID_URL = r'''(?x)
2018         (?:https?://)?
2019         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2020         (?:watch\?(?:
2021             feature=[a-z_]+|
2022             annotation_id=annotation_[^&]+|
2023             x-yt-cl=[0-9]+|
2024             hl=[^&]*|
2025             t=[0-9]+
2026         )?
2027         |
2028             attribution_link\?a=[^&]+
2029         )
2030         $
2031     '''
2032
2033     _TESTS = [{
2034         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2035         'only_matching': True,
2036     }, {
2037         'url': 'http://www.youtube.com/watch?',
2038         'only_matching': True,
2039     }, {
2040         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2041         'only_matching': True,
2042     }, {
2043         'url': 'https://www.youtube.com/watch?feature=foo',
2044         'only_matching': True,
2045     }, {
2046         'url': 'https://www.youtube.com/watch?hl=en-GB',
2047         'only_matching': True,
2048     }, {
2049         'url': 'https://www.youtube.com/watch?t=2372',
2050         'only_matching': True,
2051     }]
2052
2053     def _real_extract(self, url):
2054         raise ExtractorError(
2055             'Did you forget to quote the URL? Remember that & is a meta '
2056             'character in most shells, so you want to put the URL in quotes, '
2057             'like  youtube-dl '
2058             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2059             ' or simply  youtube-dl BaW_jenozKc  .',
2060             expected=True)
2061
2062
2063 class YoutubeTruncatedIDIE(InfoExtractor):
2064     IE_NAME = 'youtube:truncated_id'
2065     IE_DESC = False  # Do not list
2066     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2067
2068     _TESTS = [{
2069         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2070         'only_matching': True,
2071     }]
2072
2073     def _real_extract(self, url):
2074         video_id = self._match_id(url)
2075         raise ExtractorError(
2076             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2077             expected=True)