youtube_dl/extractor/anitube.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6
   7
   8 class AnitubeIE(InfoExtractor):
   9     IE_NAME = 'anitube.se'
  10     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
  11
  12     _TEST = {
  13         'url': 'http://www.anitube.se/video/36621',
  14         'md5': '59d0eeae28ea0bc8c05e7af429998d43',
  15         'info_dict': {
  16             'id': '36621',
  17             'ext': 'mp4',
  18             'title': 'Recorder to Randoseru 01',
  19             'duration': 180.19,
  20         },
  21         'skip': 'Blocked in the US',
  22     }
  23
  24     def _real_extract(self, url):
  25         mobj = re.match(self._VALID_URL, url)
  26         video_id = mobj.group('id')
  27
  28         webpage = self._download_webpage(url, video_id)
  29         key = self._html_search_regex(
  30             r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
  31
  32         config_xml = self._download_xml(
  33             'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
  34
  35         video_title = config_xml.find('title').text
  36         thumbnail = config_xml.find('image').text
  37         duration = float(config_xml.find('duration').text)
  38
  39         formats = []
  40         video_url = config_xml.find('file')
  41         if video_url is not None:
  42             formats.append({
  43                 'format_id': 'sd',
  44                 'url': video_url.text,
  45             })
  46         video_url = config_xml.find('filehd')
  47         if video_url is not None:
  48             formats.append({
  49                 'format_id': 'hd',
  50                 'url': video_url.text,
  51             })
  52
  53         return {
  54             'id': video_id,
  55             'title': video_title,
  56             'thumbnail': thumbnail,
  57             'duration': duration,
  58             'formats': formats
  59         }