youtube_dl/extractor/khanacademy.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     unified_strdate,
   8 )
   9
  10
  11 class KhanAcademyIE(InfoExtractor):
  12     _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
  13     IE_NAME = 'KhanAcademy'
  14
  15     _TEST = {
  16         'url': 'http://www.khanacademy.org/video/one-time-pad',
  17         'file': 'one-time-pad.mp4',
  18         'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
  19         'info_dict': {
  20             'title': 'The one-time pad',
  21             'description': 'The perfect cipher',
  22             'duration': 176,
  23             'uploader': 'Brit Cruise',
  24             'upload_date': '20120411',
  25         }
  26     }
  27
  28     def _real_extract(self, url):
  29         m = re.match(self._VALID_URL, url)
  30         video_id = m.group('id')
  31
  32         if m.group('key') == 'video':
  33             data = self._download_json(
  34                 'http://api.khanacademy.org/api/v1/videos/' + video_id,
  35                 video_id, 'Downloading video info')
  36
  37             upload_date = unified_strdate(data['date_added'])
  38             uploader = ', '.join(data['author_names'])
  39             return {
  40                 '_type': 'url_transparent',
  41                 'url': data['url'],
  42                 'id': video_id,
  43                 'title': data['title'],
  44                 'thumbnail': data['image_url'],
  45                 'duration': data['duration'],
  46                 'description': data['description'],
  47                 'uploader': uploader,
  48                 'upload_date': upload_date,
  49             }
  50         else:
  51             # topic
  52             data = self._download_json(
  53                 'http://api.khanacademy.org/api/v1/topic/' + video_id,
  54                 video_id, 'Downloading topic info')
  55
  56             entries = [
  57                 {
  58                     '_type': 'url',
  59                     'url': c['url'],
  60                     'id': c['id'],
  61                     'title': c['title'],
  62                 }
  63                 for c in data['children'] if c['kind'] in ('Video', 'Topic')]
  64
  65             return {
  66                 '_type': 'playlist',
  67                 'id': video_id,
  68                 'title': data['title'],
  69                 'description': data['description'],
  70                 'entries': entries,
  71             }