From 41333b97b9471316cf0f395db59196e6571fc776 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 12 May 2015 22:35:16 +0800 Subject: [PATCH] [qqmusic] Add support for charts / top lists --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/qqmusic.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index de19dfd7a..8ec0c1032 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -414,6 +414,7 @@ from .qqmusic import ( QQMusicIE, QQMusicSingerIE, QQMusicAlbumIE, + QQMusicToplistIE, ) from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 174c8e0ae..d4a85d8c3 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + js_to_json, ) from ..compat import compat_urllib_request @@ -168,3 +169,57 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): album_page, 'album details', default=None) return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' + + _TESTS = [{ + 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'info_dict': { + 'id': 'global_12', + 'title': 'itunes榜', + }, + 'playlist_count': 10, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'info_dict': { + 'id': 'top_6', + 'title': 'QQ音乐巅峰榜·欧美', + }, + 'playlist_count': 100, + }] + + @staticmethod + def strip_qq_jsonp(code): + return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_type = list_id.split("_")[0] + num_id = list_id.split("_")[1] + + list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + if list_type == 'top': + list = self._download_json( + "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + elif list_type == 'global': + entries = self.get_entries_from_page(list_page) + + list_name = self._html_search_regex( + r'

([^\']+)

', list_page, 'top list name', + default=None) + list_desc = None + + return self.playlist_result(entries, list_id, list_name, list_desc) \ No newline at end of file -- 2.40.0