youtube_dl/extractor/zingmp3.py

   1 # coding=utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8
   9 class ZingMp3BaseInfoExtractor(InfoExtractor):
  10
  11     @staticmethod
  12     def _extract_item(item):
  13         title = item.find('./title').text.strip()
  14         source = item.find('./source').text
  15         extension = item.attrib['type']
  16         thumbnail = item.find('./backimage').text
  17
  18         return {
  19             'title': title,
  20             'url': source,
  21             'ext': extension,
  22             'thumbnail': thumbnail,
  23         }
  24
  25     def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
  26         player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
  27         items = player_xml.findall('./item')
  28
  29         if len(items) == 1:
  30             # one single song
  31             data = self._extract_item(items[0])
  32             data['id'] = id
  33
  34             return data
  35         else:
  36             # playlist of songs
  37             entries = []
  38
  39             for i, item in enumerate(items, 1):
  40                 entry = self._extract_item(item)
  41                 entry['id'] = '%s-%d' % (id, i)
  42                 entries.append(entry)
  43
  44             return {
  45                 '_type': 'playlist',
  46                 'id': id,
  47                 'title': playlist_title,
  48                 'entries': entries,
  49             }
  50
  51
  52 class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
  53     _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
  54     _TESTS = [{
  55         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
  56         'md5': 'ead7ae13693b3205cbc89536a077daed',
  57         'info_dict': {
  58             'id': 'ZWZB9WAB',
  59             'title': 'Xa Mãi Xa',
  60             'ext': 'mp3',
  61             'thumbnail': 're:^https?://.*\.jpg$',
  62         },
  63     }]
  64     IE_NAME = 'zingmp3:song'
  65     IE_DESC = 'mp3.zing.vn songs'
  66
  67     def _real_extract(self, url):
  68         matched = re.match(self._VALID_URL, url)
  69         slug = matched.group('slug')
  70         song_id = matched.group('song_id')
  71
  72         webpage = self._download_webpage(
  73             'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
  74
  75         player_xml_url = self._search_regex(
  76             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
  77
  78         return self._extract_player_xml(player_xml_url, song_id)
  79
  80
  81 class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
  82     _VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
  83     _TESTS = [{
  84         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
  85         'info_dict': {
  86             '_type': 'playlist',
  87             'id': 'ZWZBWDAF',
  88             'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
  89         },
  90         'playlist_count': 10,
  91     }]
  92     IE_NAME = 'zingmp3:album'
  93     IE_DESC = 'mp3.zing.vn albums'
  94
  95     def _real_extract(self, url):
  96         matched = re.match(self._VALID_URL, url)
  97         slug = matched.group('slug')
  98         album_id = matched.group('album_id')
  99
 100         webpage = self._download_webpage(
 101             'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
 102         player_xml_url = self._search_regex(
 103             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
 104
 105         return self._extract_player_xml(
 106             player_xml_url, album_id,
 107             playlist_title=self._og_search_title(webpage))