Improvements

[youtube-dl.git] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 571c73889008349fcbaf157e17d3f6d617ac5a5c..f227e208634464fcff44c25b83aa4249b5afdb68 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -5,9 +5,10 @@ import netrc
  import re
  import socket
  import itertools
  import re
  import socket
  import itertools
+import xml.etree.ElementTree
  
  from .common import InfoExtractor, SearchInfoExtractor
  
  from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesIE
+from .subtitles import SubtitlesInfoExtractor
  from ..utils import (
      compat_http_client,
      compat_parse_qs,
  from ..utils import (
      compat_http_client,
      compat_parse_qs,
@@ -24,91 +25,143 @@ from ..utils import (
      orderedSet,
  )
  
      orderedSet,
  )
  
-
-class YoutubeSubtitlesIE(SubtitlesIE):
-
-    def _get_available_subtitles(self, video_id):
-        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+class YoutubeBaseInfoExtractor(InfoExtractor):
+    """Provide base functions for Youtube extractors"""
+    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
+    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+    _NETRC_MACHINE = 'youtube'
+    # If True it will raise an error if no login info is provided
+    _LOGIN_REQUIRED = False
+
+    def report_lang(self):
+        """Report attempt to set language."""
+        self.to_screen(u'Setting language')
+
+    def _set_language(self):
+        request = compat_urllib_request.Request(self._LANG_URL)
          try:
          try:
-            sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            self.report_lang()
+            compat_urllib_request.urlopen(request).read()
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
-            return {}
-        lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
-
-        sub_lang_list = {}
-        for l in lang_list:
-            lang = l[1]
-            params = compat_urllib_parse.urlencode({
-                'lang': lang,
-                'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat'),
-            })
-            url = u'http://www.youtube.com/api/timedtext?' + params
-            sub_lang_list[lang] = url
-        if not sub_lang_list:
-            self._downloader.report_warning(u'video doesn\'t have subtitles')
-            return {}
-        return sub_lang_list
-
-    def _request_automatic_caption(self, video_id, webpage):
-        """We need the webpage for getting the captions url, pass it as an
-           argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
-        sub_format = self._downloader.params.get('subtitlesformat')
-        self.to_screen(u'%s: Looking for automatic captions' % video_id)
-        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
-        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
-        if mobj is None:
-            self._downloader.report_warning(err_msg)
-            return {}
-        player_config = json.loads(mobj.group(1))
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
+            return False
+        return True
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        # No authentication to be performed
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return False
+
+        request = compat_urllib_request.Request(self._LOGIN_URL)
          try:
          try:
-            args = player_config[u'args']
-            caption_url = args[u'ttsurl']
-            timestamp = args[u'timestamp']
-            params = compat_urllib_parse.urlencode({
-                'lang': 'en',
-                'tlang': sub_lang,
-                'fmt': sub_format,
-                'ts': timestamp,
-                'kind': 'asr',
-            })
-            subtitles_url = caption_url + '&' + params
-            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
-            return {sub_lang: sub}
-        # An extractor error can be raise by the download process if there are
-        # no automatic captions but there are subtitles
-        except (KeyError, ExtractorError):
-            self._downloader.report_warning(err_msg)
-            return {}
+            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
+            return False
+
+        galx = None
+        dsh = None
+        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          galx = match.group(1)
+        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          dsh = match.group(1)
+
+        # Log in
+        login_form_strs = {
+                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                u'Email': username,
+                u'GALX': galx,
+                u'Passwd': password,
+                u'PersistentCookie': u'yes',
+                u'_utf8': u'霱',
+                u'bgresponse': u'js_disabled',
+                u'checkConnection': u'',
+                u'checkedDomains': u'youtube',
+                u'dnConn': u'',
+                u'dsh': dsh,
+                u'pstMsg': u'0',
+                u'rmShown': u'1',
+                u'secTok': u'',
+                u'signIn': u'Sign in',
+                u'timeStmp': u'',
+                u'service': u'youtube',
+                u'uilel': u'3',
+                u'hl': u'en_US',
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
+        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
+        try:
+            self.report_login()
+            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
+                self._downloader.report_warning(u'unable to log in: bad username or password')
+                return False
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+            return False
+        return True
+
+    def _confirm_age(self):
+        age_form = {
+                'next_url':     '/',
+                'action_confirm':   'Confirm',
+                }
+        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
+        try:
+            self.report_age_confirmation()
+            compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        return True
+
+    def _real_initialize(self):
+        if self._downloader is None:
+            return
+        if not self._set_language():
+            return
+        if not self._login():
+            return
+        self._confirm_age()
  
  
  
  
-class YoutubeIE(YoutubeSubtitlesIE):
+class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      IE_DESC = u'YouTube.com'
      _VALID_URL = r"""^
                       (
                           (?:https?://)?                                       # http(s):// (optional)
      IE_DESC = u'YouTube.com'
      _VALID_URL = r"""^
                       (
                           (?:https?://)?                                       # http(s):// (optional)
-                         (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
-                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+                            tube\.majestyc\.net/|
+                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
-                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
                               )
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
                               )
-                         )?                                                   # optional -> youtube.com/xxxx is OK
+                         ))
+                         |youtu\.be/                                          # just youtu.be/xxxx
+                         )
                       )?                                                       # all until now is optional -> you can pass the naked ID
                       )?                                                       # all until now is optional -> you can pass the naked ID
-                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
+                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                       (?(1).+)?                                                # if we found the ID, everything can follow
                       $"""
      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
      # Listed in order of quality
                       (?(1).+)?                                                # if we found the ID, everything can follow
                       $"""
      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
      # Listed in order of quality
-    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
-                          '95', '94', '93', '92', '132', '151',
+    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
+                          # Apple HTTP Live Streaming
+                          '96', '95', '94', '93', '92', '132', '151',
                            # 3D
                            '85', '84', '102', '83', '101', '82', '100',
                            # Dash video
                            # 3D
                            '85', '84', '102', '83', '101', '82', '100',
                            # Dash video
@@ -117,8 +170,10 @@ class YoutubeIE(YoutubeSubtitlesIE):
                            # Dash audio
                            '141', '172', '140', '171', '139',
                            ]
                            # Dash audio
                            '141', '172', '140', '171', '139',
                            ]
-    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
-                                      '95', '94', '93', '92', '132', '151',
+    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
+                                      # Apple HTTP Live Streaming
+                                      '96', '95', '94', '93', '92', '132', '151',
+                                      # 3D
                                        '85', '102', '84', '101', '83', '100', '82',
                                        # Dash video
                                        '138', '248', '137', '247', '136', '246', '245',
                                        '85', '102', '84', '101', '83', '100', '82',
                                        # Dash video
                                        '138', '248', '137', '247', '136', '246', '245',
@@ -126,11 +181,18 @@ class YoutubeIE(YoutubeSubtitlesIE):
                                        # Dash audio
                                        '172', '141', '171', '140', '139',
                                        ]
                                        # Dash audio
                                        '172', '141', '171', '140', '139',
                                        ]
+    _video_formats_map = {
+        'flv': ['35', '34', '6', '5'],
+        '3gp': ['36', '17', '13'],
+        'mp4': ['38', '37', '22', '18'],
+        'webm': ['46', '45', '44', '43'],
+    }
      _video_extensions = {
          '13': '3gp',
      _video_extensions = {
          '13': '3gp',
-        '17': 'mp4',
+        '17': '3gp',
          '18': 'mp4',
          '22': 'mp4',
          '18': 'mp4',
          '22': 'mp4',
+        '36': '3gp',
          '37': 'mp4',
          '38': 'mp4',
          '43': 'webm',
          '37': 'mp4',
          '38': 'mp4',
          '43': 'webm',
@@ -147,7 +209,7 @@ class YoutubeIE(YoutubeSubtitlesIE):
          '101': 'webm',
          '102': 'webm',
  
          '101': 'webm',
          '102': 'webm',
  
-        # videos that use m3u8
+        # Apple HTTP Live Streaming
          '92': 'mp4',
          '93': 'mp4',
          '94': 'mp4',
          '92': 'mp4',
          '93': 'mp4',
          '94': 'mp4',
@@ -188,6 +250,7 @@ class YoutubeIE(YoutubeSubtitlesIE):
          '22': '720x1280',
          '34': '360x640',
          '35': '480x854',
          '22': '720x1280',
          '34': '360x640',
          '35': '480x854',
+        '36': '240x320',
          '37': '1080x1920',
          '38': '3072x4096',
          '43': '360x640',
          '37': '1080x1920',
          '38': '3072x4096',
          '43': '360x640',
@@ -289,7 +352,7 @@ class YoutubeIE(YoutubeSubtitlesIE):
              u"info_dict": {
                  u"upload_date": u"20120506",
                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
              u"info_dict": {
                  u"upload_date": u"20120506",
                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
                  u"uploader": u"Icona Pop",
                  u"uploader_id": u"IconaPop"
              }
                  u"uploader": u"Icona Pop",
                  u"uploader_id": u"IconaPop"
              }
@@ -327,7 +390,7 @@ class YoutubeIE(YoutubeSubtitlesIE):
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url): return False
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_video_webpage_download(self, video_id):
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_video_webpage_download(self, video_id):
@@ -360,21 +423,23 @@ class YoutubeIE(YoutubeSubtitlesIE):
          elif len(s) == 89:
              return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
          elif len(s) == 88:
          elif len(s) == 89:
              return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
          elif len(s) == 88:
-            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
          elif len(s) == 87:
              return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
          elif len(s) == 86:
          elif len(s) == 87:
              return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
          elif len(s) == 86:
-            return s[5:20] + s[2] + s[21:]
+            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
          elif len(s) == 85:
          elif len(s) == 85:
-            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
+            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
          elif len(s) == 84:
          elif len(s) == 84:
-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+            return s[81:36:-1] + s[0] + s[35:2:-1]
          elif len(s) == 83:
              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
          elif len(s) == 82:
          elif len(s) == 83:
              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
          elif len(s) == 82:
-            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
+            return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
          elif len(s) == 81:
              return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
          elif len(s) == 81:
              return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+        elif len(s) == 80:
+            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
          elif len(s) == 79:
              return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
  
          elif len(s) == 79:
              return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
  
@@ -390,104 +455,78 @@ class YoutubeIE(YoutubeSubtitlesIE):
              # Fallback to the other algortihms
              return self._decrypt_signature(s)
  
              # Fallback to the other algortihms
              return self._decrypt_signature(s)
  
-
      def _get_available_subtitles(self, video_id):
      def _get_available_subtitles(self, video_id):
-        self.report_video_subtitles_download(video_id)
-        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
          try:
          try:
-            sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'unable to download video subtitles: %s' % compat_str(err), None)
-        sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
-        sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
+            sub_list = self._download_webpage(
+                'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+                video_id, note=False)
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            return {}
+        lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+
+        sub_lang_list = {}
+        for l in lang_list:
+            lang = l[1]
+            params = compat_urllib_parse.urlencode({
+                'lang': lang,
+                'v': video_id,
+                'fmt': self._downloader.params.get('subtitlesformat'),
+            })
+            url = u'http://www.youtube.com/api/timedtext?' + params
+            sub_lang_list[lang] = url
          if not sub_lang_list:
          if not sub_lang_list:
-            return (u'video doesn\'t have subtitles', None)
+            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            return {}
          return sub_lang_list
  
          return sub_lang_list
  
-    def _list_available_subtitles(self, video_id):
-        sub_lang_list = self._get_available_subtitles(video_id)
-        self.report_video_subtitles_available(video_id, sub_lang_list)
-
-    def _request_subtitle(self, sub_lang, sub_name, video_id, format):
-        """
-        Return tuple:
-        (error_message, sub_lang, sub)
-        """
-        self.report_video_subtitles_request(video_id, sub_lang, format)
-        params = compat_urllib_parse.urlencode({
-            'lang': sub_lang,
-            'name': sub_name,
-            'v': video_id,
-            'fmt': format,
-        })
-        url = 'http://www.youtube.com/api/timedtext?' + params
-        try:
-            sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
-        if not sub:
-            return (u'Did not fetch video subtitles', None, None)
-        return (None, sub_lang, sub)
-
-    def _request_automatic_caption(self, video_id, webpage):
+    def _get_available_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
-        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+        err_msg = u'Couldn\'t find automatic captions for %s' % video_id
          if mobj is None:
          if mobj is None:
-            return [(err_msg, None, None)]
+            self._downloader.report_warning(err_msg)
+            return {}
          player_config = json.loads(mobj.group(1))
          try:
              args = player_config[u'args']
              caption_url = args[u'ttsurl']
              timestamp = args[u'timestamp']
          player_config = json.loads(mobj.group(1))
          try:
              args = player_config[u'args']
              caption_url = args[u'ttsurl']
              timestamp = args[u'timestamp']
-            params = compat_urllib_parse.urlencode({
-                'lang': 'en',
-                'tlang': sub_lang,
-                'fmt': sub_format,
-                'ts': timestamp,
-                'kind': 'asr',
+            # We get the available subtitles
+            list_params = compat_urllib_parse.urlencode({
+                'type': 'list',
+                'tlangs': 1,
+                'asrs': 1,
              })
              })
-            subtitles_url = caption_url + '&' + params
-            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
-            return [(None, sub_lang, sub)]
-        except KeyError:
-            return [(err_msg, None, None)]
-
-    def _extract_subtitle(self, video_id):
-        """
-        Return a list with a tuple:
-        [(error_message, sub_lang, sub)]
-        """
-        sub_lang_list = self._get_available_subtitles(video_id)
-        sub_format = self._downloader.params.get('subtitlesformat')
-        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
-            return [(sub_lang_list[0], None, None)]
-        if self._downloader.params.get('subtitleslang', False):
-            sub_lang = self._downloader.params.get('subtitleslang')
-        elif 'en' in sub_lang_list:
-            sub_lang = 'en'
-        else:
-            sub_lang = list(sub_lang_list.keys())[0]
-        if not sub_lang in sub_lang_list:
-            return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
-
-        subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
-        return [subtitle]
-
-    def _extract_all_subtitles(self, video_id):
-        sub_lang_list = self._get_available_subtitles(video_id)
-        sub_format = self._downloader.params.get('subtitlesformat')
-        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
-            return [(sub_lang_list[0], None, None)]
-        subtitles = []
-        for sub_lang in sub_lang_list:
-            subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
-            subtitles.append(subtitle)
-        return subtitles
+            list_url = caption_url + '&' + list_params
+            list_page = self._download_webpage(list_url, video_id)
+            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            original_lang_node = caption_list.find('track')
+            if original_lang_node.attrib.get('kind') != 'asr' :
+                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
+                return {}
+            original_lang = original_lang_node.attrib['lang_code']
+
+            sub_lang_list = {}
+            for lang_node in caption_list.findall('target'):
+                sub_lang = lang_node.attrib['lang_code']
+                params = compat_urllib_parse.urlencode({
+                    'lang': original_lang,
+                    'tlang': sub_lang,
+                    'fmt': sub_format,
+                    'ts': timestamp,
+                    'kind': 'asr',
+                })
+                sub_lang_list[sub_lang] = caption_url + '&' + params
+            return sub_lang_list
+        # An extractor error can be raise by the download process if there are
+        # no automatic captions but there are subtitles
+        except (KeyError, ExtractorError):
+            self._downloader.report_warning(err_msg)
+            return {}
  
      def _print_formats(self, formats):
          print('Available formats:')
  
      def _print_formats(self, formats):
          print('Available formats:')
@@ -529,13 +568,25 @@ class YoutubeIE(YoutubeSubtitlesIE):
              video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
          else:
              # Specific formats. We pick the first in a slash-delimeted sequence.
              video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
          else:
              # Specific formats. We pick the first in a slash-delimeted sequence.
-            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
+            # available in the specified format. For example,
+            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
+            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
              req_formats = req_format.split('/')
              video_url_list = None
              for rf in req_formats:
                  if rf in url_map:
                      video_url_list = [(rf, url_map[rf])]
                      break
              req_formats = req_format.split('/')
              video_url_list = None
              for rf in req_formats:
                  if rf in url_map:
                      video_url_list = [(rf, url_map[rf])]
                      break
+                if rf in self._video_formats_map:
+                    for srf in self._video_formats_map[rf]:
+                        if srf in url_map:
+                            video_url_list = [(srf, url_map[srf])]
+                            break
+                    else:
+                        continue
+                    break
              if video_url_list is None:
                  raise ExtractorError(u'requested format not available')
          return video_url_list
              if video_url_list is None:
                  raise ExtractorError(u'requested format not available')
          return video_url_list
@@ -675,15 +726,10 @@ class YoutubeIE(YoutubeSubtitlesIE):
                  video_description = u''
  
          # subtitles
                  video_description = u''
  
          # subtitles
-        video_subtitles = None
-
-        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
-            video_subtitles = self._extract_subtitles(video_id)
-        elif self._downloader.params.get('writeautomaticsub', False):
-            video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+        video_subtitles = self.extract_subtitles(video_id, video_webpage)
  
          if self._downloader.params.get('listsubtitles', False):
  
          if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id)
+            self._list_available_subtitles(video_id, video_webpage)
              return
  
          if 'length_seconds' not in video_info:
              return
  
          if 'length_seconds' not in video_info:
@@ -737,10 +783,7 @@ class YoutubeIE(YoutubeSubtitlesIE):
                          if self._downloader.params.get('verbose'):
                              s = url_data['s'][0]
                              if age_gate:
                          if self._downloader.params.get('verbose'):
                              s = url_data['s'][0]
                              if age_gate:
-                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
-                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
-                                    'flash player', fatal=False)
-                                player = 'flash player %s' % player_version
+                                player = 'flash player'
                              else:
                                  player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                      'html5 player', fatal=False)
                              else:
                                  player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                      'html5 player', fatal=False)
@@ -852,8 +895,11 @@ class YoutubePlaylistIE(InfoExtractor):
  
              for entry in response['feed']['entry']:
                  index = entry['yt$position']['$t']
  
              for entry in response['feed']['entry']:
                  index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'media$player' in entry['media$group']:
-                    videos.append((index, entry['media$group']['media$player']['url']))
+                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
+                    videos.append((
+                        index,
+                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
+                    ))
  
          videos = [v[1] for v in sorted(videos)]
  
  
          videos = [v[1] for v in sorted(videos)]
  
@@ -919,13 +965,20 @@ class YoutubeChannelIE(InfoExtractor):
  
  class YoutubeUserIE(InfoExtractor):
      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
  
  class YoutubeUserIE(InfoExtractor):
      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
      _GDATA_PAGE_SIZE = 50
      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
      _GDATA_PAGE_SIZE = 50
-    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
-    _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
+    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
      IE_NAME = u'youtube:user'
  
      IE_NAME = u'youtube:user'
  
+    @classmethod
+    def suitable(cls, url):
+        # Don't return True if the url can be extracted with other youtube
+        # extractor, the regex would is too permissive and it would match.
+        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_ies): return False
+        else: return super(YoutubeUserIE, cls).suitable(url)
+
      def _real_extract(self, url):
          # Extract username
          mobj = re.match(self._VALID_URL, url)
      def _real_extract(self, url):
          # Extract username
          mobj = re.match(self._VALID_URL, url)
@@ -948,13 +1001,15 @@ class YoutubeUserIE(InfoExtractor):
              page = self._download_webpage(gdata_url, username,
                                            u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
  
              page = self._download_webpage(gdata_url, username,
                                            u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
  
+            try:
+                response = json.loads(page)
+            except ValueError as err:
+                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+
              # Extract video identifiers
              ids_in_page = []
              # Extract video identifiers
              ids_in_page = []
-
-            for mobj in re.finditer(self._VIDEO_INDICATOR, page):
-                if mobj.group(1) not in ids_in_page:
-                    ids_in_page.append(mobj.group(1))
-
+            for entry in response['feed']['entry']:
+                ids_in_page.append(entry['id']['$t'].split('/')[-1])
              video_ids.extend(ids_in_page)
  
              # A little optimization - if current page is not
              video_ids.extend(ids_in_page)
  
              # A little optimization - if current page is not
@@ -1093,7 +1148,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = u'youtube:favorites'
      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = u'youtube:favorites'
      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
+    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
      _LOGIN_REQUIRED = True
  
      def _real_extract(self, url):
      _LOGIN_REQUIRED = True
  
      def _real_extract(self, url):