youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_parse_qs,
  20     compat_urllib_parse_unquote,
  21     compat_urllib_parse_unquote_plus,
  22     compat_urllib_parse_urlencode,
  23     compat_urllib_parse_urlparse,
  24     compat_urlparse,
  25     compat_str,
  26 )
  27 from ..utils import (
  28     clean_html,
  29     error_to_compat_str,
  30     ExtractorError,
  31     float_or_none,
  32     get_element_by_attribute,
  33     get_element_by_id,
  34     int_or_none,
  35     mimetype2ext,
  36     orderedSet,
  37     parse_duration,
  38     remove_quotes,
  39     remove_start,
  40     sanitized_Request,
  41     smuggle_url,
  42     str_to_int,
  43     unescapeHTML,
  44     unified_strdate,
  45     unsmuggle_url,
  46     uppercase_escape,
  47     urlencode_postdata,
  48     ISO3166Utils,
  49 )
  50
  51
  52 class YoutubeBaseInfoExtractor(InfoExtractor):
  53     """Provide base functions for Youtube extractors"""
  54     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  55     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  56     _NETRC_MACHINE = 'youtube'
  57     # If True it will raise an error if no login info is provided
  58     _LOGIN_REQUIRED = False
  59
  60     def _set_language(self):
  61         self._set_cookie(
  62             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  63             # YouTube sets the expire time to about two months
  64             expire_time=time.time() + 2 * 30 * 24 * 3600)
  65
  66     def _ids_to_results(self, ids):
  67         return [
  68             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  69             for vid_id in ids]
  70
  71     def _login(self):
  72         """
  73         Attempt to log in to YouTube.
  74         True is returned if successful or skipped.
  75         False is returned if login failed.
  76
  77         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  78         """
  79         (username, password) = self._get_login_info()
  80         # No authentication to be performed
  81         if username is None:
  82             if self._LOGIN_REQUIRED:
  83                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  84             return True
  85
  86         login_page = self._download_webpage(
  87             self._LOGIN_URL, None,
  88             note='Downloading login page',
  89             errnote='unable to fetch login page', fatal=False)
  90         if login_page is False:
  91             return
  92
  93         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  94                                   login_page, 'Login GALX parameter')
  95
  96         # Log in
  97         login_form_strs = {
  98             'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  99             'Email': username,
 100             'GALX': galx,
 101             'Passwd': password,
 102
 103             'PersistentCookie': 'yes',
 104             '_utf8': '霱',
 105             'bgresponse': 'js_disabled',
 106             'checkConnection': '',
 107             'checkedDomains': 'youtube',
 108             'dnConn': '',
 109             'pstMsg': '0',
 110             'rmShown': '1',
 111             'secTok': '',
 112             'signIn': 'Sign in',
 113             'timeStmp': '',
 114             'service': 'youtube',
 115             'uilel': '3',
 116             'hl': 'en_US',
 117         }
 118
 119         login_data = urlencode_postdata(login_form_strs)
 120
 121         req = sanitized_Request(self._LOGIN_URL, login_data)
 122         login_results = self._download_webpage(
 123             req, None,
 124             note='Logging in', errnote='unable to log in', fatal=False)
 125         if login_results is False:
 126             return False
 127
 128         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 129             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 130
 131         # Two-Factor
 132         # TODO add SMS and phone call support - these require making a request and then prompting the user
 133
 134         if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
 135             tfa_code = self._get_tfa_info('2-step verification code')
 136
 137             if not tfa_code:
 138                 self._downloader.report_warning(
 139                     'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 140                     '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 141                 return False
 142
 143             tfa_code = remove_start(tfa_code, 'G-')
 144
 145             tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
 146
 147             tfa_form_strs.update({
 148                 'Pin': tfa_code,
 149                 'TrustDevice': 'on',
 150             })
 151
 152             tfa_data = urlencode_postdata(tfa_form_strs)
 153
 154             tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
 155             tfa_results = self._download_webpage(
 156                 tfa_req, None,
 157                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 158
 159             if tfa_results is False:
 160                 return False
 161
 162             if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
 163                 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
 164                 return False
 165             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
 166                 self._downloader.report_warning('unable to log in - did the page structure change?')
 167                 return False
 168             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 169                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 170                 return False
 171
 172         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 173             self._downloader.report_warning('unable to log in: bad username or password')
 174             return False
 175         return True
 176
 177     def _real_initialize(self):
 178         if self._downloader is None:
 179             return
 180         self._set_language()
 181         if not self._login():
 182             return
 183
 184
 185 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 186     # Extract entries from page with "Load more" button
 187     def _entries(self, page, playlist_id):
 188         more_widget_html = content_html = page
 189         for page_num in itertools.count(1):
 190             for entry in self._process_page(content_html):
 191                 yield entry
 192
 193             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 194             if not mobj:
 195                 break
 196
 197             more = self._download_json(
 198                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 199                 'Downloading page #%s' % page_num,
 200                 transform_source=uppercase_escape)
 201             content_html = more['content_html']
 202             if not content_html.strip():
 203                 # Some webpages show a "Load more" button but they don't
 204                 # have more videos
 205                 break
 206             more_widget_html = more['load_more_widget_html']
 207
 208
 209 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 210     def _process_page(self, content):
 211         for video_id, video_title in self.extract_videos_from_page(content):
 212             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 213
 214     def extract_videos_from_page(self, page):
 215         ids_in_page = []
 216         titles_in_page = []
 217         for mobj in re.finditer(self._VIDEO_RE, page):
 218             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 219             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 220                 continue
 221             video_id = mobj.group('id')
 222             video_title = unescapeHTML(mobj.group('title'))
 223             if video_title:
 224                 video_title = video_title.strip()
 225             try:
 226                 idx = ids_in_page.index(video_id)
 227                 if video_title and not titles_in_page[idx]:
 228                     titles_in_page[idx] = video_title
 229             except ValueError:
 230                 ids_in_page.append(video_id)
 231                 titles_in_page.append(video_title)
 232         return zip(ids_in_page, titles_in_page)
 233
 234
 235 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 236     def _process_page(self, content):
 237         for playlist_id in orderedSet(re.findall(
 238                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 239                 content)):
 240             yield self.url_result(
 241                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 242
 243     def _real_extract(self, url):
 244         playlist_id = self._match_id(url)
 245         webpage = self._download_webpage(url, playlist_id)
 246         title = self._og_search_title(webpage, fatal=False)
 247         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 248
 249
 250 class YoutubeIE(YoutubeBaseInfoExtractor):
 251     IE_DESC = 'YouTube.com'
 252     _VALID_URL = r"""(?x)^
 253                      (
 254                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 255                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 256                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 257                             (?:www\.)?pwnyoutube\.com/|
 258                             (?:www\.)?yourepeat\.com/|
 259                             tube\.majestyc\.net/|
 260                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 261                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 262                          (?:                                                  # the various things that can precede the ID:
 263                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 264                              |(?:                                             # or the v= param in all its forms
 265                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 266                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 267                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 268                                  v=
 269                              )
 270                          ))
 271                          |(?:
 272                             youtu\.be|                                        # just youtu.be/xxxx
 273                             vid\.plus|                                        # or vid.plus/xxxx
 274                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 275                          )/
 276                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 277                          )
 278                      )?                                                       # all until now is optional -> you can pass the naked ID
 279                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 280                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 281                      (?(1).+)?                                                # if we found the ID, everything can follow
 282                      $"""
 283     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 284     _formats = {
 285         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 286         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 287         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 288         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 289         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 290         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 291         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 292         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 293         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 294         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 295         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 296         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 297         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 298         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 299         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 300         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 301         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 302         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 303
 304
 305         # 3D videos
 306         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 307         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 308         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 309         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 310         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 311         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 312         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 313
 314         # Apple HTTP Live Streaming
 315         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 316         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 317         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 318         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 319         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 320         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 321         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 322         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 323
 324         # DASH mp4 video
 325         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 326         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 327         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 328         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 329         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 330         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 331         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 332         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 333         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
 334         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
 335         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 336
 337         # Dash mp4 audio
 338         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
 339         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
 340         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 341
 342         # Dash webm
 343         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 344         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 345         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 346         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 347         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 348         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 349         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
 350         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 351         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 352         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 353         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 354         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 355         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 356         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 357         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 358         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 359         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 360         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 361         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 362         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 363         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 364         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 365
 366         # Dash webm audio
 367         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 368         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 369
 370         # Dash webm audio with opus inside
 371         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
 372         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
 373         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
 374
 375         # RTMP (unnamed)
 376         '_rtmp': {'protocol': 'rtmp'},
 377     }
 378     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 379
 380     IE_NAME = 'youtube'
 381     _TESTS = [
 382         {
 383             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 384             'info_dict': {
 385                 'id': 'BaW_jenozKc',
 386                 'ext': 'mp4',
 387                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 388                 'uploader': 'Philipp Hagemeister',
 389                 'uploader_id': 'phihag',
 390                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
 391                 'upload_date': '20121002',
 392                 'license': 'Standard YouTube License',
 393                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 394                 'categories': ['Science & Technology'],
 395                 'tags': ['youtube-dl'],
 396                 'like_count': int,
 397                 'dislike_count': int,
 398                 'start_time': 1,
 399                 'end_time': 9,
 400             }
 401         },
 402         {
 403             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 404             'note': 'Test generic use_cipher_signature video (#897)',
 405             'info_dict': {
 406                 'id': 'UxxajLWwzqY',
 407                 'ext': 'mp4',
 408                 'upload_date': '20120506',
 409                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 410                 'alt_title': 'I Love It (feat. Charli XCX)',
 411                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 412                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 413                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 414                          'iconic ep', 'iconic', 'love', 'it'],
 415                 'uploader': 'Icona Pop',
 416                 'uploader_id': 'IconaPop',
 417                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 418                 'license': 'Standard YouTube License',
 419                 'creator': 'Icona Pop',
 420             }
 421         },
 422         {
 423             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 424             'note': 'Test VEVO video with age protection (#956)',
 425             'info_dict': {
 426                 'id': '07FYdnEawAQ',
 427                 'ext': 'mp4',
 428                 'upload_date': '20130703',
 429                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 430                 'alt_title': 'Tunnel Vision',
 431                 'description': 'md5:64249768eec3bc4276236606ea996373',
 432                 'uploader': 'justintimberlakeVEVO',
 433                 'uploader_id': 'justintimberlakeVEVO',
 434                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 435                 'license': 'Standard YouTube License',
 436                 'creator': 'Justin Timberlake',
 437                 'age_limit': 18,
 438             }
 439         },
 440         {
 441             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 442             'note': 'Embed-only video (#1746)',
 443             'info_dict': {
 444                 'id': 'yZIXLfi8CZQ',
 445                 'ext': 'mp4',
 446                 'upload_date': '20120608',
 447                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 448                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 449                 'uploader': 'SET India',
 450                 'uploader_id': 'setindia',
 451                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
 452                 'license': 'Standard YouTube License',
 453                 'age_limit': 18,
 454             }
 455         },
 456         {
 457             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 458             'note': 'Use the first video ID in the URL',
 459             'info_dict': {
 460                 'id': 'BaW_jenozKc',
 461                 'ext': 'mp4',
 462                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 463                 'uploader': 'Philipp Hagemeister',
 464                 'uploader_id': 'phihag',
 465                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
 466                 'upload_date': '20121002',
 467                 'license': 'Standard YouTube License',
 468                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 469                 'categories': ['Science & Technology'],
 470                 'tags': ['youtube-dl'],
 471                 'like_count': int,
 472                 'dislike_count': int,
 473             },
 474             'params': {
 475                 'skip_download': True,
 476             },
 477         },
 478         {
 479             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 480             'note': '256k DASH audio (format 141) via DASH manifest',
 481             'info_dict': {
 482                 'id': 'a9LDPn-MO4I',
 483                 'ext': 'm4a',
 484                 'upload_date': '20121002',
 485                 'uploader_id': '8KVIDEO',
 486                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 487                 'description': '',
 488                 'uploader': '8KVIDEO',
 489                 'license': 'Standard YouTube License',
 490                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 491             },
 492             'params': {
 493                 'youtube_include_dash_manifest': True,
 494                 'format': '141',
 495             },
 496         },
 497         # DASH manifest with encrypted signature
 498         {
 499             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 500             'info_dict': {
 501                 'id': 'IB3lcPjvWLA',
 502                 'ext': 'm4a',
 503                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 504                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 505                 'uploader': 'AfrojackVEVO',
 506                 'uploader_id': 'AfrojackVEVO',
 507                 'upload_date': '20131011',
 508                 'license': 'Standard YouTube License',
 509             },
 510             'params': {
 511                 'youtube_include_dash_manifest': True,
 512                 'format': '141',
 513             },
 514         },
 515         # JS player signature function name containing $
 516         {
 517             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 518             'info_dict': {
 519                 'id': 'nfWlot6h_JM',
 520                 'ext': 'm4a',
 521                 'title': 'Taylor Swift - Shake It Off',
 522                 'alt_title': 'Shake It Off',
 523                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 524                 'uploader': 'TaylorSwiftVEVO',
 525                 'uploader_id': 'TaylorSwiftVEVO',
 526                 'upload_date': '20140818',
 527                 'license': 'Standard YouTube License',
 528                 'creator': 'Taylor Swift',
 529             },
 530             'params': {
 531                 'youtube_include_dash_manifest': True,
 532                 'format': '141',
 533             },
 534         },
 535         # Controversy video
 536         {
 537             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 538             'info_dict': {
 539                 'id': 'T4XJQO3qol8',
 540                 'ext': 'mp4',
 541                 'upload_date': '20100909',
 542                 'uploader': 'The Amazing Atheist',
 543                 'uploader_id': 'TheAmazingAtheist',
 544                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 545                 'license': 'Standard YouTube License',
 546                 'title': 'Burning Everyone\'s Koran',
 547                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 548             }
 549         },
 550         # Normal age-gate video (No vevo, embed allowed)
 551         {
 552             'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
 553             'info_dict': {
 554                 'id': 'HtVdAasjOgU',
 555                 'ext': 'mp4',
 556                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 557                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 558                 'uploader': 'The Witcher',
 559                 'uploader_id': 'WitcherGame',
 560                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 561                 'upload_date': '20140605',
 562                 'license': 'Standard YouTube License',
 563                 'age_limit': 18,
 564             },
 565         },
 566         # Age-gate video with encrypted signature
 567         {
 568             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
 569             'info_dict': {
 570                 'id': '6kLq3WMV1nU',
 571                 'ext': 'mp4',
 572                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 573                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 574                 'uploader': 'LloydVEVO',
 575                 'uploader_id': 'LloydVEVO',
 576                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 577                 'upload_date': '20110629',
 578                 'license': 'Standard YouTube License',
 579                 'age_limit': 18,
 580             },
 581         },
 582         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 583         {
 584             'url': '__2ABJjxzNo',
 585             'info_dict': {
 586                 'id': '__2ABJjxzNo',
 587                 'ext': 'mp4',
 588                 'upload_date': '20100430',
 589                 'uploader_id': 'deadmau5',
 590                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 591                 'creator': 'deadmau5',
 592                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 593                 'uploader': 'deadmau5',
 594                 'license': 'Standard YouTube License',
 595                 'title': 'Deadmau5 - Some Chords (HD)',
 596                 'alt_title': 'Some Chords',
 597             },
 598             'expected_warnings': [
 599                 'DASH manifest missing',
 600             ]
 601         },
 602         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 603         {
 604             'url': 'lqQg6PlCWgI',
 605             'info_dict': {
 606                 'id': 'lqQg6PlCWgI',
 607                 'ext': 'mp4',
 608                 'upload_date': '20150827',
 609                 'uploader_id': 'olympic',
 610                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
 611                 'license': 'Standard YouTube License',
 612                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 613                 'uploader': 'Olympics',
 614                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 615             },
 616             'params': {
 617                 'skip_download': 'requires avconv',
 618             }
 619         },
 620         # Non-square pixels
 621         {
 622             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 623             'info_dict': {
 624                 'id': '_b-2C3KPAM0',
 625                 'ext': 'mp4',
 626                 'stretched_ratio': 16 / 9.,
 627                 'upload_date': '20110310',
 628                 'uploader_id': 'AllenMeow',
 629                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 630                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 631                 'uploader': '孫艾倫',
 632                 'license': 'Standard YouTube License',
 633                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 634             },
 635         },
 636         # url_encoded_fmt_stream_map is empty string
 637         {
 638             'url': 'qEJwOuvDf7I',
 639             'info_dict': {
 640                 'id': 'qEJwOuvDf7I',
 641                 'ext': 'webm',
 642                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 643                 'description': '',
 644                 'upload_date': '20150404',
 645                 'uploader_id': 'spbelect',
 646                 'uploader': 'Наблюдатели Петербурга',
 647             },
 648             'params': {
 649                 'skip_download': 'requires avconv',
 650             },
 651             'skip': 'This live event has ended.',
 652         },
 653         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 654         {
 655             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 656             'info_dict': {
 657                 'id': 'FIl7x6_3R5Y',
 658                 'ext': 'mp4',
 659                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 660                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 661                 'upload_date': '20150625',
 662                 'uploader_id': 'dorappi2000',
 663                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 664                 'uploader': 'dorappi2000',
 665                 'license': 'Standard YouTube License',
 666                 'formats': 'mincount:33',
 667             },
 668         },
 669         # DASH manifest with segment_list
 670         {
 671             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 672             'md5': '8ce563a1d667b599d21064e982ab9e31',
 673             'info_dict': {
 674                 'id': 'CsmdDsKjzN8',
 675                 'ext': 'mp4',
 676                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 677                 'uploader': 'Airtek',
 678                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 679                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 680                 'license': 'Standard YouTube License',
 681                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 682             },
 683             'params': {
 684                 'youtube_include_dash_manifest': True,
 685                 'format': '135',  # bestvideo
 686             }
 687         },
 688         {
 689             # Multifeed videos (multiple cameras), URL is for Main Camera
 690             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 691             'info_dict': {
 692                 'id': 'jqWvoWXjCVs',
 693                 'title': 'teamPGP: Rocket League Noob Stream',
 694                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 695             },
 696             'playlist': [{
 697                 'info_dict': {
 698                     'id': 'jqWvoWXjCVs',
 699                     'ext': 'mp4',
 700                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 701                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 702                     'upload_date': '20150721',
 703                     'uploader': 'Beer Games Beer',
 704                     'uploader_id': 'beergamesbeer',
 705                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 706                     'license': 'Standard YouTube License',
 707                 },
 708             }, {
 709                 'info_dict': {
 710                     'id': '6h8e8xoXJzg',
 711                     'ext': 'mp4',
 712                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 713                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 714                     'upload_date': '20150721',
 715                     'uploader': 'Beer Games Beer',
 716                     'uploader_id': 'beergamesbeer',
 717                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 718                     'license': 'Standard YouTube License',
 719                 },
 720             }, {
 721                 'info_dict': {
 722                     'id': 'PUOgX5z9xZw',
 723                     'ext': 'mp4',
 724                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 725                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 726                     'upload_date': '20150721',
 727                     'uploader': 'Beer Games Beer',
 728                     'uploader_id': 'beergamesbeer',
 729                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 730                     'license': 'Standard YouTube License',
 731                 },
 732             }, {
 733                 'info_dict': {
 734                     'id': 'teuwxikvS5k',
 735                     'ext': 'mp4',
 736                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 737                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 738                     'upload_date': '20150721',
 739                     'uploader': 'Beer Games Beer',
 740                     'uploader_id': 'beergamesbeer',
 741                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 742                     'license': 'Standard YouTube License',
 743                 },
 744             }],
 745             'params': {
 746                 'skip_download': True,
 747             },
 748         },
 749         {
 750             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 751             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 752             'info_dict': {
 753                 'id': 'gVfLd0zydlo',
 754                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 755             },
 756             'playlist_count': 2,
 757         },
 758         {
 759             'url': 'http://vid.plus/FlRa-iH7PGw',
 760             'only_matching': True,
 761         },
 762         {
 763             'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 764             'only_matching': True,
 765         },
 766         {
 767             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 768             # Also tests cut-off URL expansion in video description (see
 769             # https://github.com/rg3/youtube-dl/issues/1892,
 770             # https://github.com/rg3/youtube-dl/issues/8164)
 771             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 772             'info_dict': {
 773                 'id': 'lsguqyKfVQg',
 774                 'ext': 'mp4',
 775                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 776                 'alt_title': 'Dark Walk',
 777                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 778                 'upload_date': '20151119',
 779                 'uploader_id': 'IronSoulElf',
 780                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 781                 'uploader': 'IronSoulElf',
 782                 'license': 'Standard YouTube License',
 783                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
 784             },
 785             'params': {
 786                 'skip_download': True,
 787             },
 788         },
 789         {
 790             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 791             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 792             'only_matching': True,
 793         },
 794         {
 795             # Video with yt:stretch=17:0
 796             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 797             'info_dict': {
 798                 'id': 'Q39EVAstoRM',
 799                 'ext': 'mp4',
 800                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 801                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 802                 'upload_date': '20151107',
 803                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 804                 'uploader': 'CH GAMER DROID',
 805             },
 806             'params': {
 807                 'skip_download': True,
 808             },
 809         },
 810         {
 811             # Video licensed under Creative Commons
 812             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 813             'info_dict': {
 814                 'id': 'M4gD1WSo5mA',
 815                 'ext': 'mp4',
 816                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 817                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 818                 'upload_date': '20150127',
 819                 'uploader_id': 'BerkmanCenter',
 820                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 821                 'uploader': 'BerkmanCenter',
 822                 'license': 'Creative Commons Attribution license (reuse allowed)',
 823             },
 824             'params': {
 825                 'skip_download': True,
 826             },
 827         },
 828         {
 829             # Channel-like uploader_url
 830             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 831             'info_dict': {
 832                 'id': 'eQcmzGIKrzg',
 833                 'ext': 'mp4',
 834                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 835                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 836                 'upload_date': '20151119',
 837                 'uploader': 'Bernie 2016',
 838                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 839                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 840                 'license': 'Creative Commons Attribution license (reuse allowed)',
 841             },
 842             'params': {
 843                 'skip_download': True,
 844             },
 845         },
 846         {
 847             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 848             'only_matching': True,
 849         }
 850     ]
 851
 852     def __init__(self, *args, **kwargs):
 853         super(YoutubeIE, self).__init__(*args, **kwargs)
 854         self._player_cache = {}
 855
 856     def report_video_info_webpage_download(self, video_id):
 857         """Report attempt to download video info webpage."""
 858         self.to_screen('%s: Downloading video info webpage' % video_id)
 859
 860     def report_information_extraction(self, video_id):
 861         """Report attempt to extract video information."""
 862         self.to_screen('%s: Extracting video information' % video_id)
 863
 864     def report_unavailable_format(self, video_id, format):
 865         """Report extracted video URL."""
 866         self.to_screen('%s: Format %s not available' % (video_id, format))
 867
 868     def report_rtmp_download(self):
 869         """Indicate the download will use the RTMP protocol."""
 870         self.to_screen('RTMP download detected')
 871
 872     def _signature_cache_id(self, example_sig):
 873         """ Return a string representation of a signature """
 874         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 875
 876     def _extract_signature_function(self, video_id, player_url, example_sig):
 877         id_m = re.match(
 878             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
 879             player_url)
 880         if not id_m:
 881             raise ExtractorError('Cannot identify player %r' % player_url)
 882         player_type = id_m.group('ext')
 883         player_id = id_m.group('id')
 884
 885         # Read from filesystem cache
 886         func_id = '%s_%s_%s' % (
 887             player_type, player_id, self._signature_cache_id(example_sig))
 888         assert os.path.basename(func_id) == func_id
 889
 890         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 891         if cache_spec is not None:
 892             return lambda s: ''.join(s[i] for i in cache_spec)
 893
 894         download_note = (
 895             'Downloading player %s' % player_url
 896             if self._downloader.params.get('verbose') else
 897             'Downloading %s player %s' % (player_type, player_id)
 898         )
 899         if player_type == 'js':
 900             code = self._download_webpage(
 901                 player_url, video_id,
 902                 note=download_note,
 903                 errnote='Download of %s failed' % player_url)
 904             res = self._parse_sig_js(code)
 905         elif player_type == 'swf':
 906             urlh = self._request_webpage(
 907                 player_url, video_id,
 908                 note=download_note,
 909                 errnote='Download of %s failed' % player_url)
 910             code = urlh.read()
 911             res = self._parse_sig_swf(code)
 912         else:
 913             assert False, 'Invalid player type %r' % player_type
 914
 915         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 916         cache_res = res(test_string)
 917         cache_spec = [ord(c) for c in cache_res]
 918
 919         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 920         return res
 921
 922     def _print_sig_code(self, func, example_sig):
 923         def gen_sig_code(idxs):
 924             def _genslice(start, end, step):
 925                 starts = '' if start == 0 else str(start)
 926                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
 927                 steps = '' if step == 1 else (':%d' % step)
 928                 return 's[%s%s%s]' % (starts, ends, steps)
 929
 930             step = None
 931             # Quelch pyflakes warnings - start will be set when step is set
 932             start = '(Never used)'
 933             for i, prev in zip(idxs[1:], idxs[:-1]):
 934                 if step is not None:
 935                     if i - prev == step:
 936                         continue
 937                     yield _genslice(start, prev, step)
 938                     step = None
 939                     continue
 940                 if i - prev in [-1, 1]:
 941                     step = i - prev
 942                     start = prev
 943                     continue
 944                 else:
 945                     yield 's[%d]' % prev
 946             if step is None:
 947                 yield 's[%d]' % i
 948             else:
 949                 yield _genslice(start, i, step)
 950
 951         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 952         cache_res = func(test_string)
 953         cache_spec = [ord(c) for c in cache_res]
 954         expr_code = ' + '.join(gen_sig_code(cache_spec))
 955         signature_id_tuple = '(%s)' % (
 956             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 957         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 958                 '    return %s\n') % (signature_id_tuple, expr_code)
 959         self.to_screen('Extracted signature function:\n' + code)
 960
 961     def _parse_sig_js(self, jscode):
 962         funcname = self._search_regex(
 963             r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
 964             'Initial JS player signature function name')
 965
 966         jsi = JSInterpreter(jscode)
 967         initial_function = jsi.extract_function(funcname)
 968         return lambda s: initial_function([s])
 969
 970     def _parse_sig_swf(self, file_contents):
 971         swfi = SWFInterpreter(file_contents)
 972         TARGET_CLASSNAME = 'SignatureDecipher'
 973         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 974         initial_function = swfi.extract_function(searched_class, 'decipher')
 975         return lambda s: initial_function([s])
 976
 977     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 978         """Turn the encrypted s field into a working signature"""
 979
 980         if player_url is None:
 981             raise ExtractorError('Cannot decrypt signature without player_url')
 982
 983         if player_url.startswith('//'):
 984             player_url = 'https:' + player_url
 985         try:
 986             player_id = (player_url, self._signature_cache_id(s))
 987             if player_id not in self._player_cache:
 988                 func = self._extract_signature_function(
 989                     video_id, player_url, s
 990                 )
 991                 self._player_cache[player_id] = func
 992             func = self._player_cache[player_id]
 993             if self._downloader.params.get('youtube_print_sig_code'):
 994                 self._print_sig_code(func, s)
 995             return func(s)
 996         except Exception as e:
 997             tb = traceback.format_exc()
 998             raise ExtractorError(
 999                 'Signature extraction failed: ' + tb, cause=e)
1000
1001     def _get_subtitles(self, video_id, webpage):
1002         try:
1003             subs_doc = self._download_xml(
1004                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1005                 video_id, note=False)
1006         except ExtractorError as err:
1007             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1008             return {}
1009
1010         sub_lang_list = {}
1011         for track in subs_doc.findall('track'):
1012             lang = track.attrib['lang_code']
1013             if lang in sub_lang_list:
1014                 continue
1015             sub_formats = []
1016             for ext in self._SUBTITLE_FORMATS:
1017                 params = compat_urllib_parse_urlencode({
1018                     'lang': lang,
1019                     'v': video_id,
1020                     'fmt': ext,
1021                     'name': track.attrib['name'].encode('utf-8'),
1022                 })
1023                 sub_formats.append({
1024                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1025                     'ext': ext,
1026                 })
1027             sub_lang_list[lang] = sub_formats
1028         if not sub_lang_list:
1029             self._downloader.report_warning('video doesn\'t have subtitles')
1030             return {}
1031         return sub_lang_list
1032
1033     def _get_ytplayer_config(self, video_id, webpage):
1034         patterns = (
1035             # User data may contain arbitrary character sequences that may affect
1036             # JSON extraction with regex, e.g. when '};' is contained the second
1037             # regex won't capture the whole JSON. Yet working around by trying more
1038             # concrete regex first keeping in mind proper quoted string handling
1039             # to be implemented in future that will replace this workaround (see
1040             # https://github.com/rg3/youtube-dl/issues/7468,
1041             # https://github.com/rg3/youtube-dl/pull/7599)
1042             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1043             r';ytplayer\.config\s*=\s*({.+?});',
1044         )
1045         config = self._search_regex(
1046             patterns, webpage, 'ytplayer.config', default=None)
1047         if config:
1048             return self._parse_json(
1049                 uppercase_escape(config), video_id, fatal=False)
1050
1051     def _get_automatic_captions(self, video_id, webpage):
1052         """We need the webpage for getting the captions url, pass it as an
1053            argument to speed up the process."""
1054         self.to_screen('%s: Looking for automatic captions' % video_id)
1055         player_config = self._get_ytplayer_config(video_id, webpage)
1056         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1057         if not player_config:
1058             self._downloader.report_warning(err_msg)
1059             return {}
1060         try:
1061             args = player_config['args']
1062             caption_url = args.get('ttsurl')
1063             if caption_url:
1064                 timestamp = args['timestamp']
1065                 # We get the available subtitles
1066                 list_params = compat_urllib_parse_urlencode({
1067                     'type': 'list',
1068                     'tlangs': 1,
1069                     'asrs': 1,
1070                 })
1071                 list_url = caption_url + '&' + list_params
1072                 caption_list = self._download_xml(list_url, video_id)
1073                 original_lang_node = caption_list.find('track')
1074                 if original_lang_node is None:
1075                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1076                     return {}
1077                 original_lang = original_lang_node.attrib['lang_code']
1078                 caption_kind = original_lang_node.attrib.get('kind', '')
1079
1080                 sub_lang_list = {}
1081                 for lang_node in caption_list.findall('target'):
1082                     sub_lang = lang_node.attrib['lang_code']
1083                     sub_formats = []
1084                     for ext in self._SUBTITLE_FORMATS:
1085                         params = compat_urllib_parse_urlencode({
1086                             'lang': original_lang,
1087                             'tlang': sub_lang,
1088                             'fmt': ext,
1089                             'ts': timestamp,
1090                             'kind': caption_kind,
1091                         })
1092                         sub_formats.append({
1093                             'url': caption_url + '&' + params,
1094                             'ext': ext,
1095                         })
1096                     sub_lang_list[sub_lang] = sub_formats
1097                 return sub_lang_list
1098
1099             # Some videos don't provide ttsurl but rather caption_tracks and
1100             # caption_translation_languages (e.g. 20LmZk1hakA)
1101             caption_tracks = args['caption_tracks']
1102             caption_translation_languages = args['caption_translation_languages']
1103             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1104             parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
1105             caption_qs = compat_parse_qs(parsed_caption_url.query)
1106
1107             sub_lang_list = {}
1108             for lang in caption_translation_languages.split(','):
1109                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1110                 sub_lang = lang_qs.get('lc', [None])[0]
1111                 if not sub_lang:
1112                     continue
1113                 sub_formats = []
1114                 for ext in self._SUBTITLE_FORMATS:
1115                     caption_qs.update({
1116                         'tlang': [sub_lang],
1117                         'fmt': [ext],
1118                     })
1119                     sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
1120                         query=compat_urllib_parse_urlencode(caption_qs, True)))
1121                     sub_formats.append({
1122                         'url': sub_url,
1123                         'ext': ext,
1124                     })
1125                 sub_lang_list[sub_lang] = sub_formats
1126             return sub_lang_list
1127         # An extractor error can be raise by the download process if there are
1128         # no automatic captions but there are subtitles
1129         except (KeyError, ExtractorError):
1130             self._downloader.report_warning(err_msg)
1131             return {}
1132
1133     def _mark_watched(self, video_id, video_info):
1134         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1135         if not playback_url:
1136             return
1137         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1138         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1139
1140         # cpn generation algorithm is reverse engineered from base.js.
1141         # In fact it works even with dummy cpn.
1142         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1143         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1144
1145         qs.update({
1146             'ver': ['2'],
1147             'cpn': [cpn],
1148         })
1149         playback_url = compat_urlparse.urlunparse(
1150             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1151
1152         self._download_webpage(
1153             playback_url, video_id, 'Marking watched',
1154             'Unable to mark watched', fatal=False)
1155
1156     @classmethod
1157     def extract_id(cls, url):
1158         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1159         if mobj is None:
1160             raise ExtractorError('Invalid URL: %s' % url)
1161         video_id = mobj.group(2)
1162         return video_id
1163
1164     def _extract_from_m3u8(self, manifest_url, video_id):
1165         url_map = {}
1166
1167         def _get_urls(_manifest):
1168             lines = _manifest.split('\n')
1169             urls = filter(lambda l: l and not l.startswith('#'),
1170                           lines)
1171             return urls
1172         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1173         formats_urls = _get_urls(manifest)
1174         for format_url in formats_urls:
1175             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1176             url_map[itag] = format_url
1177         return url_map
1178
1179     def _extract_annotations(self, video_id):
1180         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1181         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1182
1183     def _real_extract(self, url):
1184         url, smuggled_data = unsmuggle_url(url, {})
1185
1186         proto = (
1187             'http' if self._downloader.params.get('prefer_insecure', False)
1188             else 'https')
1189
1190         start_time = None
1191         end_time = None
1192         parsed_url = compat_urllib_parse_urlparse(url)
1193         for component in [parsed_url.fragment, parsed_url.query]:
1194             query = compat_parse_qs(component)
1195             if start_time is None and 't' in query:
1196                 start_time = parse_duration(query['t'][0])
1197             if start_time is None and 'start' in query:
1198                 start_time = parse_duration(query['start'][0])
1199             if end_time is None and 'end' in query:
1200                 end_time = parse_duration(query['end'][0])
1201
1202         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1203         mobj = re.search(self._NEXT_URL_RE, url)
1204         if mobj:
1205             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1206         video_id = self.extract_id(url)
1207
1208         # Get video webpage
1209         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1210         video_webpage = self._download_webpage(url, video_id)
1211
1212         # Attempt to extract SWF player URL
1213         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1214         if mobj is not None:
1215             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1216         else:
1217             player_url = None
1218
1219         dash_mpds = []
1220
1221         def add_dash_mpd(video_info):
1222             dash_mpd = video_info.get('dashmpd')
1223             if dash_mpd and dash_mpd[0] not in dash_mpds:
1224                 dash_mpds.append(dash_mpd[0])
1225
1226         # Get video info
1227         embed_webpage = None
1228         is_live = None
1229         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1230             age_gate = True
1231             # We simulate the access to the video from www.youtube.com/v/{video_id}
1232             # this can be viewed without login into Youtube
1233             url = proto + '://www.youtube.com/embed/%s' % video_id
1234             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1235             data = compat_urllib_parse_urlencode({
1236                 'video_id': video_id,
1237                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1238                 'sts': self._search_regex(
1239                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1240             })
1241             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1242             video_info_webpage = self._download_webpage(
1243                 video_info_url, video_id,
1244                 note='Refetching age-gated info webpage',
1245                 errnote='unable to download video info webpage')
1246             video_info = compat_parse_qs(video_info_webpage)
1247             add_dash_mpd(video_info)
1248         else:
1249             age_gate = False
1250             video_info = None
1251             # Try looking directly into the video webpage
1252             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1253             if ytplayer_config:
1254                 args = ytplayer_config['args']
1255                 if args.get('url_encoded_fmt_stream_map'):
1256                     # Convert to the same format returned by compat_parse_qs
1257                     video_info = dict((k, [v]) for k, v in args.items())
1258                     add_dash_mpd(video_info)
1259                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1260                     is_live = True
1261             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1262                 # We also try looking in get_video_info since it may contain different dashmpd
1263                 # URL that points to a DASH manifest with possibly different itag set (some itags
1264                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1265                 # manifest pointed by get_video_info's dashmpd).
1266                 # The general idea is to take a union of itags of both DASH manifests (for example
1267                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1268                 self.report_video_info_webpage_download(video_id)
1269                 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1270                     video_info_url = (
1271                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1272                         % (proto, video_id, el_type))
1273                     video_info_webpage = self._download_webpage(
1274                         video_info_url,
1275                         video_id, note=False,
1276                         errnote='unable to download video info webpage')
1277                     get_video_info = compat_parse_qs(video_info_webpage)
1278                     if get_video_info.get('use_cipher_signature') != ['True']:
1279                         add_dash_mpd(get_video_info)
1280                     if not video_info:
1281                         video_info = get_video_info
1282                     if 'token' in get_video_info:
1283                         # Different get_video_info requests may report different results, e.g.
1284                         # some may report video unavailability, but some may serve it without
1285                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1286                         # the original webpage as well as el=info and el=embedded get_video_info
1287                         # requests report video unavailability due to geo restriction while
1288                         # el=detailpage succeeds and returns valid data). This is probably
1289                         # due to YouTube measures against IP ranges of hosting providers.
1290                         # Working around by preferring the first succeeded video_info containing
1291                         # the token if no such video_info yet was found.
1292                         if 'token' not in video_info:
1293                             video_info = get_video_info
1294                         break
1295         if 'token' not in video_info:
1296             if 'reason' in video_info:
1297                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1298                     regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1299                     if regions_allowed:
1300                         raise ExtractorError('YouTube said: This video is available in %s only' % (
1301                             ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1302                             expected=True)
1303                 raise ExtractorError(
1304                     'YouTube said: %s' % video_info['reason'][0],
1305                     expected=True, video_id=video_id)
1306             else:
1307                 raise ExtractorError(
1308                     '"token" parameter not in video info for unknown reason',
1309                     video_id=video_id)
1310
1311         # title
1312         if 'title' in video_info:
1313             video_title = video_info['title'][0]
1314         else:
1315             self._downloader.report_warning('Unable to extract video title')
1316             video_title = '_'
1317
1318         # description
1319         video_description = get_element_by_id("eow-description", video_webpage)
1320         if video_description:
1321             video_description = re.sub(r'''(?x)
1322                 <a\s+
1323                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1324                     (?:title|href)="([^"]+)"\s+
1325                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1326                     class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
1327                 [^<]+\.{3}\s*
1328                 </a>
1329             ''', r'\1', video_description)
1330             video_description = clean_html(video_description)
1331         else:
1332             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1333             if fd_mobj:
1334                 video_description = unescapeHTML(fd_mobj.group(1))
1335             else:
1336                 video_description = ''
1337
1338         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1339             if not self._downloader.params.get('noplaylist'):
1340                 entries = []
1341                 feed_ids = []
1342                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1343                 for feed in multifeed_metadata_list.split(','):
1344                     # Unquote should take place before split on comma (,) since textual
1345                     # fields may contain comma as well (see
1346                     # https://github.com/rg3/youtube-dl/issues/8536)
1347                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1348                     entries.append({
1349                         '_type': 'url_transparent',
1350                         'ie_key': 'Youtube',
1351                         'url': smuggle_url(
1352                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1353                             {'force_singlefeed': True}),
1354                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1355                     })
1356                     feed_ids.append(feed_data['id'][0])
1357                 self.to_screen(
1358                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1359                     % (', '.join(feed_ids), video_id))
1360                 return self.playlist_result(entries, video_id, video_title, video_description)
1361             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1362
1363         if 'view_count' in video_info:
1364             view_count = int(video_info['view_count'][0])
1365         else:
1366             view_count = None
1367
1368         # Check for "rental" videos
1369         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1370             raise ExtractorError('"rental" videos not supported')
1371
1372         # Start extracting information
1373         self.report_information_extraction(video_id)
1374
1375         # uploader
1376         if 'author' not in video_info:
1377             raise ExtractorError('Unable to extract uploader name')
1378         video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1379
1380         # uploader_id
1381         video_uploader_id = None
1382         video_uploader_url = None
1383         mobj = re.search(
1384             r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1385             video_webpage)
1386         if mobj is not None:
1387             video_uploader_id = mobj.group('uploader_id')
1388             video_uploader_url = mobj.group('uploader_url')
1389         else:
1390             self._downloader.report_warning('unable to extract uploader nickname')
1391
1392         # thumbnail image
1393         # We try first to get a high quality image:
1394         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1395                             video_webpage, re.DOTALL)
1396         if m_thumb is not None:
1397             video_thumbnail = m_thumb.group(1)
1398         elif 'thumbnail_url' not in video_info:
1399             self._downloader.report_warning('unable to extract video thumbnail')
1400             video_thumbnail = None
1401         else:   # don't panic if we can't find it
1402             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1403
1404         # upload date
1405         upload_date = self._html_search_meta(
1406             'datePublished', video_webpage, 'upload date', default=None)
1407         if not upload_date:
1408             upload_date = self._search_regex(
1409                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1410                  r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1411                 video_webpage, 'upload date', default=None)
1412             if upload_date:
1413                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1414         upload_date = unified_strdate(upload_date)
1415
1416         video_license = self._html_search_regex(
1417             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1418             video_webpage, 'license', default=None)
1419
1420         m_music = re.search(
1421             r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1422             video_webpage)
1423         if m_music:
1424             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1425             video_creator = clean_html(m_music.group('creator'))
1426         else:
1427             video_alt_title = video_creator = None
1428
1429         m_cat_container = self._search_regex(
1430             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1431             video_webpage, 'categories', default=None)
1432         if m_cat_container:
1433             category = self._html_search_regex(
1434                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1435                 default=None)
1436             video_categories = None if category is None else [category]
1437         else:
1438             video_categories = None
1439
1440         video_tags = [
1441             unescapeHTML(m.group('content'))
1442             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1443
1444         def _extract_count(count_name):
1445             return str_to_int(self._search_regex(
1446                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1447                 % re.escape(count_name),
1448                 video_webpage, count_name, default=None))
1449
1450         like_count = _extract_count('like')
1451         dislike_count = _extract_count('dislike')
1452
1453         # subtitles
1454         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1455         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1456
1457         if 'length_seconds' not in video_info:
1458             self._downloader.report_warning('unable to extract video duration')
1459             video_duration = None
1460         else:
1461             video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1462
1463         # annotations
1464         video_annotations = None
1465         if self._downloader.params.get('writeannotations', False):
1466             video_annotations = self._extract_annotations(video_id)
1467
1468         def _map_to_format_list(urlmap):
1469             formats = []
1470             for itag, video_real_url in urlmap.items():
1471                 dct = {
1472                     'format_id': itag,
1473                     'url': video_real_url,
1474                     'player_url': player_url,
1475                 }
1476                 if itag in self._formats:
1477                     dct.update(self._formats[itag])
1478                 formats.append(dct)
1479             return formats
1480
1481         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1482             self.report_rtmp_download()
1483             formats = [{
1484                 'format_id': '_rtmp',
1485                 'protocol': 'rtmp',
1486                 'url': video_info['conn'][0],
1487                 'player_url': player_url,
1488             }]
1489         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1490             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1491             if 'rtmpe%3Dyes' in encoded_url_map:
1492                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1493             formats_spec = {}
1494             fmt_list = video_info.get('fmt_list', [''])[0]
1495             if fmt_list:
1496                 for fmt in fmt_list.split(','):
1497                     spec = fmt.split('/')
1498                     if len(spec) > 1:
1499                         width_height = spec[1].split('x')
1500                         if len(width_height) == 2:
1501                             formats_spec[spec[0]] = {
1502                                 'resolution': spec[1],
1503                                 'width': int_or_none(width_height[0]),
1504                                 'height': int_or_none(width_height[1]),
1505                             }
1506             formats = []
1507             for url_data_str in encoded_url_map.split(','):
1508                 url_data = compat_parse_qs(url_data_str)
1509                 if 'itag' not in url_data or 'url' not in url_data:
1510                     continue
1511                 format_id = url_data['itag'][0]
1512                 url = url_data['url'][0]
1513
1514                 if 'sig' in url_data:
1515                     url += '&signature=' + url_data['sig'][0]
1516                 elif 's' in url_data:
1517                     encrypted_sig = url_data['s'][0]
1518                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1519
1520                     jsplayer_url_json = self._search_regex(
1521                         ASSETS_RE,
1522                         embed_webpage if age_gate else video_webpage,
1523                         'JS player URL (1)', default=None)
1524                     if not jsplayer_url_json and not age_gate:
1525                         # We need the embed website after all
1526                         if embed_webpage is None:
1527                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1528                             embed_webpage = self._download_webpage(
1529                                 embed_url, video_id, 'Downloading embed webpage')
1530                         jsplayer_url_json = self._search_regex(
1531                             ASSETS_RE, embed_webpage, 'JS player URL')
1532
1533                     player_url = json.loads(jsplayer_url_json)
1534                     if player_url is None:
1535                         player_url_json = self._search_regex(
1536                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1537                             video_webpage, 'age gate player URL')
1538                         player_url = json.loads(player_url_json)
1539
1540                     if self._downloader.params.get('verbose'):
1541                         if player_url is None:
1542                             player_version = 'unknown'
1543                             player_desc = 'unknown'
1544                         else:
1545                             if player_url.endswith('swf'):
1546                                 player_version = self._search_regex(
1547                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1548                                     'flash player', fatal=False)
1549                                 player_desc = 'flash player %s' % player_version
1550                             else:
1551                                 player_version = self._search_regex(
1552                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1553                                     player_url,
1554                                     'html5 player', fatal=False)
1555                                 player_desc = 'html5 player %s' % player_version
1556
1557                         parts_sizes = self._signature_cache_id(encrypted_sig)
1558                         self.to_screen('{%s} signature length %s, %s' %
1559                                        (format_id, parts_sizes, player_desc))
1560
1561                     signature = self._decrypt_signature(
1562                         encrypted_sig, video_id, player_url, age_gate)
1563                     url += '&signature=' + signature
1564                 if 'ratebypass' not in url:
1565                     url += '&ratebypass=yes'
1566
1567                 dct = {
1568                     'format_id': format_id,
1569                     'url': url,
1570                     'player_url': player_url,
1571                 }
1572                 if format_id in self._formats:
1573                     dct.update(self._formats[format_id])
1574                 if format_id in formats_spec:
1575                     dct.update(formats_spec[format_id])
1576
1577                 # Some itags are not included in DASH manifest thus corresponding formats will
1578                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1579                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1580                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1581                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1582
1583                 more_fields = {
1584                     'filesize': int_or_none(url_data.get('clen', [None])[0]),
1585                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1586                     'width': width,
1587                     'height': height,
1588                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1589                     'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1590                 }
1591                 for key, value in more_fields.items():
1592                     if value:
1593                         dct[key] = value
1594                 type_ = url_data.get('type', [None])[0]
1595                 if type_:
1596                     type_split = type_.split(';')
1597                     kind_ext = type_split[0].split('/')
1598                     if len(kind_ext) == 2:
1599                         kind, _ = kind_ext
1600                         dct['ext'] = mimetype2ext(type_split[0])
1601                         if kind in ('audio', 'video'):
1602                             codecs = None
1603                             for mobj in re.finditer(
1604                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1605                                 if mobj.group('key') == 'codecs':
1606                                     codecs = mobj.group('val')
1607                                     break
1608                             if codecs:
1609                                 codecs = codecs.split(',')
1610                                 if len(codecs) == 2:
1611                                     acodec, vcodec = codecs[1], codecs[0]
1612                                 else:
1613                                     acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1614                                 dct.update({
1615                                     'acodec': acodec,
1616                                     'vcodec': vcodec,
1617                                 })
1618                 formats.append(dct)
1619         elif video_info.get('hlsvp'):
1620             manifest_url = video_info['hlsvp'][0]
1621             url_map = self._extract_from_m3u8(manifest_url, video_id)
1622             formats = _map_to_format_list(url_map)
1623             # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1624             for a_format in formats:
1625                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1626         else:
1627             unavailable_message = self._html_search_regex(
1628                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1629                 video_webpage, 'unavailable message', default=None)
1630             if unavailable_message:
1631                 raise ExtractorError(unavailable_message, expected=True)
1632             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1633
1634         # Look for the DASH manifest
1635         if self._downloader.params.get('youtube_include_dash_manifest', True):
1636             dash_mpd_fatal = True
1637             for mpd_url in dash_mpds:
1638                 dash_formats = {}
1639                 try:
1640                     def decrypt_sig(mobj):
1641                         s = mobj.group(1)
1642                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1643                         return '/signature/%s' % dec_s
1644
1645                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
1646
1647                     for df in self._extract_mpd_formats(
1648                             mpd_url, video_id, fatal=dash_mpd_fatal,
1649                             formats_dict=self._formats):
1650                         # Do not overwrite DASH format found in some previous DASH manifest
1651                         if df['format_id'] not in dash_formats:
1652                             dash_formats[df['format_id']] = df
1653                         # Additional DASH manifests may end up in HTTP Error 403 therefore
1654                         # allow them to fail without bug report message if we already have
1655                         # some DASH manifest succeeded. This is temporary workaround to reduce
1656                         # burst of bug reports until we figure out the reason and whether it
1657                         # can be fixed at all.
1658                         dash_mpd_fatal = False
1659                 except (ExtractorError, KeyError) as e:
1660                     self.report_warning(
1661                         'Skipping DASH manifest: %r' % e, video_id)
1662                 if dash_formats:
1663                     # Remove the formats we found through non-DASH, they
1664                     # contain less info and it can be wrong, because we use
1665                     # fixed values (for example the resolution). See
1666                     # https://github.com/rg3/youtube-dl/issues/5774 for an
1667                     # example.
1668                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1669                     formats.extend(dash_formats.values())
1670
1671         # Check for malformed aspect ratio
1672         stretched_m = re.search(
1673             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1674             video_webpage)
1675         if stretched_m:
1676             w = float(stretched_m.group('w'))
1677             h = float(stretched_m.group('h'))
1678             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1679             # We will only process correct ratios.
1680             if w > 0 and h > 0:
1681                 ratio = w / h
1682                 for f in formats:
1683                     if f.get('vcodec') != 'none':
1684                         f['stretched_ratio'] = ratio
1685
1686         self._sort_formats(formats)
1687
1688         self.mark_watched(video_id, video_info)
1689
1690         return {
1691             'id': video_id,
1692             'uploader': video_uploader,
1693             'uploader_id': video_uploader_id,
1694             'uploader_url': video_uploader_url,
1695             'upload_date': upload_date,
1696             'license': video_license,
1697             'creator': video_creator,
1698             'title': video_title,
1699             'alt_title': video_alt_title,
1700             'thumbnail': video_thumbnail,
1701             'description': video_description,
1702             'categories': video_categories,
1703             'tags': video_tags,
1704             'subtitles': video_subtitles,
1705             'automatic_captions': automatic_captions,
1706             'duration': video_duration,
1707             'age_limit': 18 if age_gate else 0,
1708             'annotations': video_annotations,
1709             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1710             'view_count': view_count,
1711             'like_count': like_count,
1712             'dislike_count': dislike_count,
1713             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1714             'formats': formats,
1715             'is_live': is_live,
1716             'start_time': start_time,
1717             'end_time': end_time,
1718         }
1719
1720
1721 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
1722     IE_DESC = 'YouTube.com playlists'
1723     _VALID_URL = r"""(?x)(?:
1724                         (?:https?://)?
1725                         (?:\w+\.)?
1726                         youtube\.com/
1727                         (?:
1728                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1729                            \? (?:.*?[&;])*? (?:p|a|list)=
1730                         |  p/
1731                         )
1732                         (
1733                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1734                             # Top tracks, they can also include dots
1735                             |(?:MC)[\w\.]*
1736                         )
1737                         .*
1738                      |
1739                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1740                      )"""
1741     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1742     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1743     IE_NAME = 'youtube:playlist'
1744     _TESTS = [{
1745         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1746         'info_dict': {
1747             'title': 'ytdl test PL',
1748             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1749         },
1750         'playlist_count': 3,
1751     }, {
1752         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1753         'info_dict': {
1754             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1755             'title': 'YDL_Empty_List',
1756         },
1757         'playlist_count': 0,
1758     }, {
1759         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1760         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1761         'info_dict': {
1762             'title': '29C3: Not my department',
1763             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1764         },
1765         'playlist_count': 95,
1766     }, {
1767         'note': 'issue #673',
1768         'url': 'PLBB231211A4F62143',
1769         'info_dict': {
1770             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1771             'id': 'PLBB231211A4F62143',
1772         },
1773         'playlist_mincount': 26,
1774     }, {
1775         'note': 'Large playlist',
1776         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1777         'info_dict': {
1778             'title': 'Uploads from Cauchemar',
1779             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1780         },
1781         'playlist_mincount': 799,
1782     }, {
1783         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1784         'info_dict': {
1785             'title': 'YDL_safe_search',
1786             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1787         },
1788         'playlist_count': 2,
1789     }, {
1790         'note': 'embedded',
1791         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1792         'playlist_count': 4,
1793         'info_dict': {
1794             'title': 'JODA15',
1795             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1796         }
1797     }, {
1798         'note': 'Embedded SWF player',
1799         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1800         'playlist_count': 4,
1801         'info_dict': {
1802             'title': 'JODA7',
1803             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1804         }
1805     }, {
1806         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1807         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1808         'info_dict': {
1809             'title': 'Uploads from Interstellar Movie',
1810             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1811         },
1812         'playlist_mincout': 21,
1813     }]
1814
1815     def _real_initialize(self):
1816         self._login()
1817
1818     def _extract_mix(self, playlist_id):
1819         # The mixes are generated from a single video
1820         # the id of the playlist is just 'RD' + video_id
1821         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1822         webpage = self._download_webpage(
1823             url, playlist_id, 'Downloading Youtube mix')
1824         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1825         title_span = (
1826             search_title('playlist-title') or
1827             search_title('title long-title') or
1828             search_title('title'))
1829         title = clean_html(title_span)
1830         ids = orderedSet(re.findall(
1831             r'''(?xs)data-video-username=".*?".*?
1832                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1833             webpage))
1834         url_results = self._ids_to_results(ids)
1835
1836         return self.playlist_result(url_results, playlist_id, title)
1837
1838     def _extract_playlist(self, playlist_id):
1839         url = self._TEMPLATE_URL % playlist_id
1840         page = self._download_webpage(url, playlist_id)
1841
1842         for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1843             match = match.strip()
1844             # Check if the playlist exists or is private
1845             if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1846                 raise ExtractorError(
1847                     'The playlist doesn\'t exist or is private, use --username or '
1848                     '--netrc to access it.',
1849                     expected=True)
1850             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1851                 raise ExtractorError(
1852                     'Invalid parameters. Maybe URL is incorrect.',
1853                     expected=True)
1854             elif re.match(r'[^<]*Choose your language[^<]*', match):
1855                 continue
1856             else:
1857                 self.report_warning('Youtube gives an alert message: ' + match)
1858
1859         playlist_title = self._html_search_regex(
1860             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1861             page, 'title')
1862
1863         return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1864
1865     def _check_download_just_video(self, url, playlist_id):
1866         # Check if it's a video-specific URL
1867         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1868         if 'v' in query_dict:
1869             video_id = query_dict['v'][0]
1870             if self._downloader.params.get('noplaylist'):
1871                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1872                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1873             else:
1874                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1875
1876     def _real_extract(self, url):
1877         # Extract playlist id
1878         mobj = re.match(self._VALID_URL, url)
1879         if mobj is None:
1880             raise ExtractorError('Invalid URL: %s' % url)
1881         playlist_id = mobj.group(1) or mobj.group(2)
1882
1883         video = self._check_download_just_video(url, playlist_id)
1884         if video:
1885             return video
1886
1887         if playlist_id.startswith(('RD', 'UL', 'PU')):
1888             # Mixes require a custom extraction process
1889             return self._extract_mix(playlist_id)
1890
1891         return self._extract_playlist(playlist_id)
1892
1893
1894 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1895     IE_DESC = 'YouTube.com channels'
1896     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1897     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1898     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1899     IE_NAME = 'youtube:channel'
1900     _TESTS = [{
1901         'note': 'paginated channel',
1902         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1903         'playlist_mincount': 91,
1904         'info_dict': {
1905             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1906             'title': 'Uploads from lex will',
1907         }
1908     }, {
1909         'note': 'Age restricted channel',
1910         # from https://www.youtube.com/user/DeusExOfficial
1911         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1912         'playlist_mincount': 64,
1913         'info_dict': {
1914             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1915             'title': 'Uploads from Deus Ex',
1916         },
1917     }]
1918
1919     @classmethod
1920     def suitable(cls, url):
1921         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
1922                 else super(YoutubeChannelIE, cls).suitable(url))
1923
1924     def _real_extract(self, url):
1925         channel_id = self._match_id(url)
1926
1927         url = self._TEMPLATE_URL % channel_id
1928
1929         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1930         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1931         # otherwise fallback on channel by page extraction
1932         channel_page = self._download_webpage(
1933             url + '?view=57', channel_id,
1934             'Downloading channel page', fatal=False)
1935         if channel_page is False:
1936             channel_playlist_id = False
1937         else:
1938             channel_playlist_id = self._html_search_meta(
1939                 'channelId', channel_page, 'channel id', default=None)
1940             if not channel_playlist_id:
1941                 channel_playlist_id = self._search_regex(
1942                     r'data-(?:channel-external-|yt)id="([^"]+)"',
1943                     channel_page, 'channel id', default=None)
1944         if channel_playlist_id and channel_playlist_id.startswith('UC'):
1945             playlist_id = 'UU' + channel_playlist_id[2:]
1946             return self.url_result(
1947                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1948
1949         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1950         autogenerated = re.search(r'''(?x)
1951                 class="[^"]*?(?:
1952                     channel-header-autogenerated-label|
1953                     yt-channel-title-autogenerated
1954                 )[^"]*"''', channel_page) is not None
1955
1956         if autogenerated:
1957             # The videos are contained in a single page
1958             # the ajax pages can't be used, they are empty
1959             entries = [
1960                 self.url_result(
1961                     video_id, 'Youtube', video_id=video_id,
1962                     video_title=video_title)
1963                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1964             return self.playlist_result(entries, channel_id)
1965
1966         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1967
1968
1969 class YoutubeUserIE(YoutubeChannelIE):
1970     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1971     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1972     _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1973     IE_NAME = 'youtube:user'
1974
1975     _TESTS = [{
1976         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1977         'playlist_mincount': 320,
1978         'info_dict': {
1979             'title': 'TheLinuxFoundation',
1980         }
1981     }, {
1982         'url': 'ytuser:phihag',
1983         'only_matching': True,
1984     }]
1985
1986     @classmethod
1987     def suitable(cls, url):
1988         # Don't return True if the url can be extracted with other youtube
1989         # extractor, the regex would is too permissive and it would match.
1990         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1991         if any(ie.suitable(url) for ie in other_ies):
1992             return False
1993         else:
1994             return super(YoutubeUserIE, cls).suitable(url)
1995
1996
1997 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
1998     IE_DESC = 'YouTube.com live streams'
1999     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
2000     IE_NAME = 'youtube:live'
2001
2002     _TESTS = [{
2003         'url': 'http://www.youtube.com/user/TheYoungTurks/live',
2004         'info_dict': {
2005             'id': 'a48o2S1cPoo',
2006             'ext': 'mp4',
2007             'title': 'The Young Turks - Live Main Show',
2008             'uploader': 'The Young Turks',
2009             'uploader_id': 'TheYoungTurks',
2010             'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2011             'upload_date': '20150715',
2012             'license': 'Standard YouTube License',
2013             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2014             'categories': ['News & Politics'],
2015             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2016             'like_count': int,
2017             'dislike_count': int,
2018         },
2019         'params': {
2020             'skip_download': True,
2021         },
2022     }, {
2023         'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2024         'only_matching': True,
2025     }]
2026
2027     def _real_extract(self, url):
2028         mobj = re.match(self._VALID_URL, url)
2029         channel_id = mobj.group('id')
2030         base_url = mobj.group('base_url')
2031         webpage = self._download_webpage(url, channel_id, fatal=False)
2032         if webpage:
2033             page_type = self._og_search_property(
2034                 'type', webpage, 'page type', default=None)
2035             video_id = self._html_search_meta(
2036                 'videoId', webpage, 'video id', default=None)
2037             if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2038                 return self.url_result(video_id, YoutubeIE.ie_key())
2039         return self.url_result(base_url)
2040
2041
2042 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2043     IE_DESC = 'YouTube.com user/channel playlists'
2044     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2045     IE_NAME = 'youtube:playlists'
2046
2047     _TESTS = [{
2048         'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
2049         'playlist_mincount': 4,
2050         'info_dict': {
2051             'id': 'ThirstForScience',
2052             'title': 'Thirst for Science',
2053         },
2054     }, {
2055         # with "Load more" button
2056         'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2057         'playlist_mincount': 70,
2058         'info_dict': {
2059             'id': 'igorkle1',
2060             'title': 'Игорь Клейнер',
2061         },
2062     }, {
2063         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2064         'playlist_mincount': 17,
2065         'info_dict': {
2066             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2067             'title': 'Chem Player',
2068         },
2069     }]
2070
2071
2072 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
2073     IE_DESC = 'YouTube.com searches'
2074     # there doesn't appear to be a real limit, for example if you search for
2075     # 'python' you get more than 8.000.000 results
2076     _MAX_RESULTS = float('inf')
2077     IE_NAME = 'youtube:search'
2078     _SEARCH_KEY = 'ytsearch'
2079     _EXTRA_QUERY_ARGS = {}
2080     _TESTS = []
2081
2082     def _get_n_results(self, query, n):
2083         """Get a specified number of results for a query"""
2084
2085         videos = []
2086         limit = n
2087
2088         for pagenum in itertools.count(1):
2089             url_query = {
2090                 'search_query': query.encode('utf-8'),
2091                 'page': pagenum,
2092                 'spf': 'navigate',
2093             }
2094             url_query.update(self._EXTRA_QUERY_ARGS)
2095             result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2096             data = self._download_json(
2097                 result_url, video_id='query "%s"' % query,
2098                 note='Downloading page %s' % pagenum,
2099                 errnote='Unable to download API page')
2100             html_content = data[1]['body']['content']
2101
2102             if 'class="search-message' in html_content:
2103                 raise ExtractorError(
2104                     '[youtube] No video results', expected=True)
2105
2106             new_videos = self._ids_to_results(orderedSet(re.findall(
2107                 r'href="/watch\?v=(.{11})', html_content)))
2108             videos += new_videos
2109             if not new_videos or len(videos) > limit:
2110                 break
2111
2112         if len(videos) > n:
2113             videos = videos[:n]
2114         return self.playlist_result(videos, query)
2115
2116
2117 class YoutubeSearchDateIE(YoutubeSearchIE):
2118     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2119     _SEARCH_KEY = 'ytsearchdate'
2120     IE_DESC = 'YouTube.com searches, newest videos first'
2121     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2122
2123
2124 class YoutubeSearchURLIE(InfoExtractor):
2125     IE_DESC = 'YouTube.com search URLs'
2126     IE_NAME = 'youtube:search_url'
2127     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2128     _TESTS = [{
2129         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2130         'playlist_mincount': 5,
2131         'info_dict': {
2132             'title': 'youtube-dl test video',
2133         }
2134     }, {
2135         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2136         'only_matching': True,
2137     }]
2138
2139     def _real_extract(self, url):
2140         mobj = re.match(self._VALID_URL, url)
2141         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2142
2143         webpage = self._download_webpage(url, query)
2144         result_code = self._search_regex(
2145             r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
2146
2147         part_codes = re.findall(
2148             r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
2149         entries = []
2150         for part_code in part_codes:
2151             part_title = self._html_search_regex(
2152                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
2153             part_url_snippet = self._html_search_regex(
2154                 r'(?s)href="([^"]+)"', part_code, 'item URL')
2155             part_url = compat_urlparse.urljoin(
2156                 'https://www.youtube.com/', part_url_snippet)
2157             entries.append({
2158                 '_type': 'url',
2159                 'url': part_url,
2160                 'title': part_title,
2161             })
2162
2163         return {
2164             '_type': 'playlist',
2165             'entries': entries,
2166             'title': query,
2167         }
2168
2169
2170 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2171     IE_DESC = 'YouTube.com (multi-season) shows'
2172     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
2173     IE_NAME = 'youtube:show'
2174     _TESTS = [{
2175         'url': 'https://www.youtube.com/show/airdisasters',
2176         'playlist_mincount': 5,
2177         'info_dict': {
2178             'id': 'airdisasters',
2179             'title': 'Air Disasters',
2180         }
2181     }]
2182
2183     def _real_extract(self, url):
2184         playlist_id = self._match_id(url)
2185         return super(YoutubeShowIE, self)._real_extract(
2186             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2187
2188
2189 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2190     """
2191     Base class for feed extractors
2192     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2193     """
2194     _LOGIN_REQUIRED = True
2195
2196     @property
2197     def IE_NAME(self):
2198         return 'youtube:%s' % self._FEED_NAME
2199
2200     def _real_initialize(self):
2201         self._login()
2202
2203     def _real_extract(self, url):
2204         page = self._download_webpage(
2205             'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2206
2207         # The extraction process is the same as for playlists, but the regex
2208         # for the video ids doesn't contain an index
2209         ids = []
2210         more_widget_html = content_html = page
2211         for page_num in itertools.count(1):
2212             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2213
2214             # 'recommended' feed has infinite 'load more' and each new portion spins
2215             # the same videos in (sometimes) slightly different order, so we'll check
2216             # for unicity and break when portion has no new videos
2217             new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2218             if not new_ids:
2219                 break
2220
2221             ids.extend(new_ids)
2222
2223             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2224             if not mobj:
2225                 break
2226
2227             more = self._download_json(
2228                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2229                 'Downloading page #%s' % page_num,
2230                 transform_source=uppercase_escape)
2231             content_html = more['content_html']
2232             more_widget_html = more['load_more_widget_html']
2233
2234         return self.playlist_result(
2235             self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2236
2237
2238 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2239     IE_NAME = 'youtube:watchlater'
2240     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2241     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2242
2243     _TESTS = [{
2244         'url': 'https://www.youtube.com/playlist?list=WL',
2245         'only_matching': True,
2246     }, {
2247         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2248         'only_matching': True,
2249     }]
2250
2251     def _real_extract(self, url):
2252         video = self._check_download_just_video(url, 'WL')
2253         if video:
2254             return video
2255         return self._extract_playlist('WL')
2256
2257
2258 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2259     IE_NAME = 'youtube:favorites'
2260     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2261     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2262     _LOGIN_REQUIRED = True
2263
2264     def _real_extract(self, url):
2265         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2266         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2267         return self.url_result(playlist_id, 'YoutubePlaylist')
2268
2269
2270 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2271     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2272     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2273     _FEED_NAME = 'recommended'
2274     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2275
2276
2277 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2278     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2279     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2280     _FEED_NAME = 'subscriptions'
2281     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2282
2283
2284 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2285     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2286     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2287     _FEED_NAME = 'history'
2288     _PLAYLIST_TITLE = 'Youtube History'
2289
2290
2291 class YoutubeTruncatedURLIE(InfoExtractor):
2292     IE_NAME = 'youtube:truncated_url'
2293     IE_DESC = False  # Do not list
2294     _VALID_URL = r'''(?x)
2295         (?:https?://)?
2296         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2297         (?:watch\?(?:
2298             feature=[a-z_]+|
2299             annotation_id=annotation_[^&]+|
2300             x-yt-cl=[0-9]+|
2301             hl=[^&]*|
2302             t=[0-9]+
2303         )?
2304         |
2305             attribution_link\?a=[^&]+
2306         )
2307         $
2308     '''
2309
2310     _TESTS = [{
2311         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2312         'only_matching': True,
2313     }, {
2314         'url': 'http://www.youtube.com/watch?',
2315         'only_matching': True,
2316     }, {
2317         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2318         'only_matching': True,
2319     }, {
2320         'url': 'https://www.youtube.com/watch?feature=foo',
2321         'only_matching': True,
2322     }, {
2323         'url': 'https://www.youtube.com/watch?hl=en-GB',
2324         'only_matching': True,
2325     }, {
2326         'url': 'https://www.youtube.com/watch?t=2372',
2327         'only_matching': True,
2328     }]
2329
2330     def _real_extract(self, url):
2331         raise ExtractorError(
2332             'Did you forget to quote the URL? Remember that & is a meta '
2333             'character in most shells, so you want to put the URL in quotes, '
2334             'like  youtube-dl '
2335             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2336             ' or simply  youtube-dl BaW_jenozKc  .',
2337             expected=True)
2338
2339
2340 class YoutubeTruncatedIDIE(InfoExtractor):
2341     IE_NAME = 'youtube:truncated_id'
2342     IE_DESC = False  # Do not list
2343     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2344
2345     _TESTS = [{
2346         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2347         'only_matching': True,
2348     }]
2349
2350     def _real_extract(self, url):
2351         video_id = self._match_id(url)
2352         raise ExtractorError(
2353             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2354             expected=True)