X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=e2296d153c7dbd473daf7154cbff7550f3841e76;hb=7763b04e5fac8b282dcbfcf5329941b485ef541f;hp=b34c1a7b9a494ca719f2a7ec3f1a128837efbf14;hpb=7ce7e3947600bfb4b8b84e00c02aabc91b5ed0ae;p=youtube-dl.git

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b34c1a7b9..e2296d153 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,6 +4,7 @@ import json
 import netrc
 import re
 import socket
+import itertools
 
 from .common import InfoExtractor, SearchInfoExtractor
 from ..utils import (
@@ -19,12 +20,12 @@ from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
+    orderedSet,
 )
 
 
 class YoutubeIE(InfoExtractor):
-    """Information extractor for youtube.com."""
-
+    IE_DESC = u'YouTube.com'
     _VALID_URL = r"""^
                      (
                          (?:https?://)?                                       # http(s):// (optional)
@@ -34,7 +35,7 @@ class YoutubeIE(InfoExtractor):
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                              |(?:                                             # or the v= param in all its forms
-                                 (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  v=
@@ -81,11 +82,49 @@ class YoutubeIE(InfoExtractor):
         '46': '1080x1920',
     }
     IE_NAME = u'youtube'
+    _TESTS = [
+        {
+            u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
+            u"file":  u"BaW_jenozKc.mp4",
+            u"info_dict": {
+                u"title": u"youtube-dl test video \"'/\\Ã¤â­ð",
+                u"uploader": u"Philipp Hagemeister",
+                u"uploader_id": u"phihag",
+                u"upload_date": u"20121002",
+                u"description": u"test chars:  \"'/\\Ã¤â­ð\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+            }
+        },
+        {
+            u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
+            u"file":  u"1ltcDfZMA3U.flv",
+            u"note": u"Test VEVO video (#897)",
+            u"info_dict": {
+                u"upload_date": u"20070518",
+                u"title": u"Maps - It Will Find You",
+                u"description": u"Music video by Maps performing It Will Find You.",
+                u"uploader": u"MuteUSA",
+                u"uploader_id": u"MuteUSA"
+            }
+        },
+        {
+            u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
+            u"file":  u"UxxajLWwzqY.mp4",
+            u"note": u"Test generic use_cipher_signature video (#897)",
+            u"info_dict": {
+                u"upload_date": u"20120506",
+                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
+                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+                u"uploader": u"IconaPop",
+                u"uploader_id": u"IconaPop"
+            }
+        }
+    ]
+
 
     @classmethod
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
     def report_lang(self):
@@ -129,16 +168,26 @@ class YoutubeIE(InfoExtractor):
         """Indicate the download will use the RTMP protocol."""
         self.to_screen(u'RTMP download detected')
 
-    @staticmethod
-    def _decrypt_signature(s):
-        """Decrypt the key the two subkeys must have a length of 43"""
-        (a,b) = s.split('.')
-        if len(a) != 43 or len(b) != 43:
-            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
-        b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
-        a = a[-40:]
-        s_dec = '.'.join((a,b))[::-1]
-        return s_dec
+    def _decrypt_signature(self, s):
+        """Turn the encrypted s field into a working signature"""
+
+        if len(s) == 88:
+            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+        elif len(s) == 87:
+            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+        elif len(s) == 86:
+            return s[2:63] + s[82] + s[64:82] + s[63]
+        elif len(s) == 85:
+            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+        elif len(s) == 84:
+            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
+        elif len(s) == 83:
+            return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
+        elif len(s) == 82:
+            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
+
+        else:
+            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
 
     def _get_available_subtitles(self, video_id):
         self.report_video_subtitles_download(video_id)
@@ -354,6 +403,9 @@ class YoutubeIE(InfoExtractor):
         return video_id
 
     def _real_extract(self, url):
+        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
+            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
+
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
@@ -391,7 +443,7 @@ class YoutubeIE(InfoExtractor):
                 break
         if 'token' not in video_info:
             if 'reason' in video_info:
-                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0])
+                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
             else:
                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
 
@@ -421,7 +473,12 @@ class YoutubeIE(InfoExtractor):
         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
 
         # thumbnail image
-        if 'thumbnail_url' not in video_info:
+        # We try first to get a high quality image:
+        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+                            video_webpage, re.DOTALL)
+        if m_thumb is not None:
+            video_thumbnail = m_thumb.group(1)
+        elif 'thumbnail_url' not in video_info:
             self._downloader.report_warning(u'unable to extract video thumbnail')
             video_thumbnail = ''
         else:   # don't panic if we can't find it
@@ -453,14 +510,13 @@ class YoutubeIE(InfoExtractor):
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
-                    # We try with the automatic captions
-                    video_subtitles = self._request_automatic_caption(video_id, video_webpage)
-                    (sub_error_auto, sub_lang, sub) = video_subtitles[0]
-                    if sub is not None:
-                        pass
-                    else:
-                        # We report the original error
-                        self._downloader.report_warning(sub_error)
+                    self._downloader.report_warning(sub_error)
+        
+        if self._downloader.params.get('writeautomaticsub', False):
+            video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+            (sub_error, sub_lang, sub) = video_subtitles[0]
+            if sub_error:
+                self._downloader.report_warning(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
@@ -484,6 +540,8 @@ class YoutubeIE(InfoExtractor):
 
         try:
             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+            if not mobj:
+                raise ValueError('Could not find vevo ID')
             info = json.loads(mobj.group(1))
             args = info['args']
             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
@@ -507,6 +565,12 @@ class YoutubeIE(InfoExtractor):
                     if 'sig' in url_data:
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
+                        if self._downloader.params.get('verbose'):
+                            s = url_data['s'][0]
+                            player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+                                'html5 player', fatal=False)
+                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
+                                (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
                         signature = self._decrypt_signature(url_data['s'][0])
                         url += '&signature=' + signature
                     if 'ratebypass' not in url:
@@ -528,7 +592,7 @@ class YoutubeIE(InfoExtractor):
             if req_format is None or req_format == 'best':
                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
             elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+                video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
             elif req_format in ('-1', 'all'):
                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
             else:
@@ -571,8 +635,7 @@ class YoutubeIE(InfoExtractor):
         return results
 
 class YoutubePlaylistIE(InfoExtractor):
-    """Information Extractor for YouTube playlists."""
-
+    IE_DESC = u'YouTube.com playlists'
     _VALID_URL = r"""(?:
                         (?:https?://)?
                         (?:\w+\.)?
@@ -639,8 +702,7 @@ class YoutubePlaylistIE(InfoExtractor):
 
 
 class YoutubeChannelIE(InfoExtractor):
-    """Information Extractor for YouTube channels."""
-
+    IE_DESC = u'YouTube.com channels'
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@@ -698,8 +760,7 @@ class YoutubeChannelIE(InfoExtractor):
 
 
 class YoutubeUserIE(InfoExtractor):
-    """Information Extractor for YouTube users."""
-
+    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
@@ -755,7 +816,7 @@ class YoutubeUserIE(InfoExtractor):
         return [self.playlist_result(url_results, playlist_title = username)]
 
 class YoutubeSearchIE(SearchInfoExtractor):
-    """Information Extractor for YouTube search queries."""
+    IE_DESC = u'YouTube.com searches'
     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
     _MAX_RESULTS = 1000
     IE_NAME = u'youtube:search'
@@ -795,3 +856,49 @@ class YoutubeSearchIE(SearchInfoExtractor):
             video_ids = video_ids[:n]
         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
         return self.playlist_result(videos, query)
+
+
+class YoutubeShowIE(InfoExtractor):
+    IE_DESC = u'YouTube.com (multi-season) shows'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    IE_NAME = u'youtube:show'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_name = mobj.group(1)
+        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+        # There's one playlist for each season of the show
+        m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
+        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
+        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+
+
+class YoutubeSubscriptionsIE(YoutubeIE):
+    """It's a subclass of YoutubeIE because we need to login"""
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    IE_NAME = u'youtube:subscriptions'
+    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+    _PAGING_STEP = 30
+
+    # Overwrite YoutubeIE properties we don't want
+    _TESTS = []
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url) is not None
+
+    def _real_extract(self, url):
+        feed_entries = []
+        # The step argument is available only in 2.7 or higher
+        for i in itertools.count(0):
+            paging = i*self._PAGING_STEP
+            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+                                          u'Downloading page %s' % i)
+            info = json.loads(info)
+            feed_html = info['feed_html']
+            m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
+            ids = orderedSet(m.group(1) for m in m_ids)
+            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+            if info['paging'] is None:
+                break
+        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')