X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=08a04737c3497eab4c43786a4f3e7e0a45417131;hb=1b8477729a07f3b87f4c20175cf337335100341c;hp=3417c1275a972d6cfaf98b74e72bf60bbc6f33f4;hpb=5a3f0d9aeef3d50f28c47061e1128da115f5a06f;p=youtube-dl.git

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3417c1275..08a04737c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1055,21 +1055,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         self._login()
 
     def _ids_to_results(self, ids):
-        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        return [
+            self.url_result(vid_id, 'Youtube', video_id=vid_id)
+            for vid_id in ids]
 
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a a single video
         # the id of the playlist is just 'RD' + video_id
         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        webpage = self._download_webpage(
+            url, playlist_id, u'Downloading Youtube mix')
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-        title_span = (search_title('playlist-title') or
-            search_title('title long-title') or search_title('title'))
+        title_span = (
+            search_title('playlist-title') or
+            search_title('title long-title') or
+            search_title('title'))
         title = clean_html(title_span)
-        video_re = r'''(?x)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+        ids = orderedSet(re.findall(
+            r'''(?xs)data-video-username=".*?".*?
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+            webpage))
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
@@ -1162,6 +1167,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
             msg = u'Downloading Youtube mix'
             if i > 0:
                 msg += ', retry #%d' % i
+
             webpage = self._download_webpage(url, title, msg)
             ids = orderedSet(re.findall(video_re, webpage))
             if ids:
@@ -1434,12 +1440,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             paging = mobj.group('paging')
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
-class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
-    _FEED_NAME = 'subscriptions'
-    _PLAYLIST_TITLE = u'Youtube Subscriptions'
-
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
@@ -1472,6 +1472,43 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
         return self.url_result(playlist_id, 'YoutubePlaylist')
 
 
+class YoutubeSubscriptionsIE(YoutubePlaylistIE):
+    IE_NAME = u'youtube:subscriptions'
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+
+    def _real_extract(self, url):
+        title = u'Youtube Subscriptions'
+        page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
+
+        # The extraction process is the same as for playlists, but the regex
+        # for the video ids doesn't contain an index
+        ids = []
+        more_widget_html = content_html = page
+
+        for page_num in itertools.count(1):
+            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+            new_ids = orderedSet(matches)
+            ids.extend(new_ids)
+
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
+                break
+
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), title,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
+            content_html = more['content_html']
+            more_widget_html = more['load_more_widget_html']
+
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': self._ids_to_results(ids),
+        }
+
+
 class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list