youtube_dl/extractor/yinyuetai.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import ExtractorError
   6
   7
   8 class YinYueTaiIE(InfoExtractor):
   9     IE_NAME = 'yinyuetai:video'
  10     IE_DESC = '音悦Tai'
  11     _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
  12     _TESTS = [{
  13         'url': 'http://v.yinyuetai.com/video/2322376',
  14         'md5': '6e3abe28d38e3a54b591f9f040595ce0',
  15         'info_dict': {
  16             'id': '2322376',
  17             'ext': 'mp4',
  18             'title': '少女时代_PARTY_Music Video Teaser',
  19             'creator': '少女时代',
  20             'duration': 25,
  21             'thumbnail': 're:^https?://.*\.jpg$',
  22         },
  23     }, {
  24         'url': 'http://v.yinyuetai.com/video/h5/2322376',
  25         'only_matching': True,
  26     }]
  27
  28     def _real_extract(self, url):
  29         video_id = self._match_id(url)
  30
  31         info = self._download_json(
  32             'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
  33             'Downloading mv info')['videoInfo']['coreVideoInfo']
  34
  35         if info['error']:
  36             raise ExtractorError(info['errorMsg'], expected=True)
  37
  38         formats = [{
  39             'url': format_info['videoUrl'],
  40             'format_id': format_info['qualityLevel'],
  41             'format': format_info.get('qualityLevelName'),
  42             'filesize': format_info.get('fileSize'),
  43             # though URLs ends with .flv, the downloaded files are in fact mp4
  44             'ext': 'mp4',
  45             'tbr': format_info.get('bitrate'),
  46         } for format_info in info['videoUrlModels']]
  47         self._sort_formats(formats)
  48
  49         return {
  50             'id': video_id,
  51             'title': info['videoName'],
  52             'thumbnail': info.get('bigHeadImage'),
  53             'creator': info.get('artistNames'),
  54             'duration': info.get('duration'),
  55             'formats': formats,
  56         }