X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=e2296d153c7dbd473daf7154cbff7550f3841e76;hb=7763b04e5fac8b282dcbfcf5329941b485ef541f;hp=b526e0c5375737e1d653116185d02415a6af6284;hpb=0f818663291752b2084d81aff76d945a43c66a06;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b526e0c53..e2296d153 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -4,6 +4,7 @@ import json import netrc import re import socket +import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( @@ -19,6 +20,7 @@ from ..utils import ( ExtractorError, unescapeHTML, unified_strdate, + orderedSet, ) @@ -122,7 +124,7 @@ class YoutubeIE(InfoExtractor): @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - if YoutubePlaylistIE.suitable(url): return False + if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def report_lang(self): @@ -441,7 +443,7 @@ class YoutubeIE(InfoExtractor): break if 'token' not in video_info: if 'reason' in video_info: - raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0]) + raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) else: raise ExtractorError(u'"token" parameter not in video info for unknown reason') @@ -471,7 +473,12 @@ class YoutubeIE(InfoExtractor): video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) # thumbnail image - if 'thumbnail_url' not in video_info: + # We try first to get a high quality image: + m_thumb = re.search(r'', + video_webpage, re.DOTALL) + if m_thumb is not None: + video_thumbnail = m_thumb.group(1) + elif 'thumbnail_url' not in video_info: self._downloader.report_warning(u'unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it @@ -864,3 +871,34 @@ class YoutubeShowIE(InfoExtractor): m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] + + +class YoutubeSubscriptionsIE(YoutubeIE): + """It's a subclass of YoutubeIE because we need to login""" + IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' + _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + IE_NAME = u'youtube:subscriptions' + _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s' + _PAGING_STEP = 30 + + # Overwrite YoutubeIE properties we don't want + _TESTS = [] + @classmethod + def suitable(cls, url): + return re.match(cls._VALID_URL, url) is not None + + def _real_extract(self, url): + feed_entries = [] + # The step argument is available only in 2.7 or higher + for i in itertools.count(0): + paging = i*self._PAGING_STEP + info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed', + u'Downloading page %s' % i) + info = json.loads(info) + feed_html = info['feed_html'] + m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html) + ids = orderedSet(m.group(1) for m in m_ids) + feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) + if info['paging'] is None: + break + return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')