youtube_dl/extractor/videolecturesnet.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     find_xpath_attr,
   8     int_or_none,
   9     parse_duration,
  10     unified_strdate,
  11 )
  12
  13
  14 class VideoLecturesNetIE(InfoExtractor):
  15     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
  16     IE_NAME = 'videolectures.net'
  17
  18     _TEST = {
  19         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
  20         'info_dict': {
  21             'id': 'promogram_igor_mekjavic_eng',
  22             'ext': 'mp4',
  23             'title': 'Automatics, robotics and biocybernetics',
  24             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
  25             'upload_date': '20130627',
  26             'duration': 565,
  27             'thumbnail': 're:http://.*\.jpg',
  28         },
  29     }
  30
  31     def _real_extract(self, url):
  32         mobj = re.match(self._VALID_URL, url)
  33         video_id = mobj.group('id')
  34
  35         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
  36         smil = self._download_xml(smil_url, video_id)
  37
  38         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
  39         description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
  40         description = (
  41             None if description_el is None
  42             else description_el.attrib['content'])
  43         upload_date = unified_strdate(
  44             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
  45
  46         switch = smil.find('.//switch')
  47         duration = parse_duration(switch.attrib.get('dur'))
  48         thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
  49         thumbnail = (
  50             None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
  51
  52         formats = [{
  53             'url': v.attrib['src'],
  54             'width': int_or_none(v.attrib.get('width')),
  55             'height': int_or_none(v.attrib.get('height')),
  56             'filesize': int_or_none(v.attrib.get('size')),
  57             'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
  58             'ext': v.attrib.get('ext'),
  59         } for v in switch.findall('./video')
  60             if v.attrib.get('proto') == 'http']
  61
  62         return {
  63             'id': video_id,
  64             'title': title,
  65             'description': description,
  66             'upload_date': upload_date,
  67             'duration': duration,
  68             'thumbnail': thumbnail,
  69             'formats': formats,
  70         }