youtube_dl/extractor/escapist.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     compat_urllib_parse,
   8
   9     ExtractorError,
  10 )
  11
  12
  13 class EscapistIE(InfoExtractor):
  14     _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
  15     _TEST = {
  16         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
  17         'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
  18         'info_dict': {
  19             'id': '6618',
  20             'ext': 'mp4',
  21             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
  22             'uploader': 'the-escapist-presents',
  23             'title': "Breaking Down Baldur's Gate",
  24         }
  25     }
  26
  27     def _real_extract(self, url):
  28         mobj = re.match(self._VALID_URL, url)
  29         showName = mobj.group('showname')
  30         video_id = mobj.group('id')
  31
  32         self.report_extraction(video_id)
  33         webpage = self._download_webpage(url, video_id)
  34
  35         videoDesc = self._html_search_regex(
  36             r'<meta name="description" content="([^"]*)"',
  37             webpage, 'description', fatal=False)
  38
  39         playerUrl = self._og_search_video_url(webpage, name='player URL')
  40
  41         title = self._html_search_regex(
  42             r'<meta name="title" content="([^"]*)"',
  43             webpage, 'title').split(' : ')[-1]
  44
  45         configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
  46         configUrl = compat_urllib_parse.unquote(configUrl)
  47
  48         formats = []
  49
  50         def _add_format(name, cfgurl, quality):
  51             config = self._download_json(
  52                 cfgurl, video_id,
  53                 'Downloading ' + name + ' configuration',
  54                 'Unable to download ' + name + ' configuration',
  55                 transform_source=lambda s: s.replace("'", '"'))
  56
  57             playlist = config['playlist']
  58             formats.append({
  59                 'url': playlist[1]['url'],
  60                 'format_id': name,
  61                 'quality': quality,
  62             })
  63
  64         _add_format('normal', configUrl, quality=0)
  65         hq_url = (configUrl +
  66                   ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
  67         try:
  68             _add_format('hq', hq_url, quality=1)
  69         except ExtractorError:
  70             pass  # That's fine, we'll just use normal quality
  71
  72         self._sort_formats(formats)
  73
  74         return {
  75             'id': video_id,
  76             'formats': formats,
  77             'uploader': showName,
  78             'title': title,
  79             'thumbnail': self._og_search_thumbnail(webpage),
  80             'description': videoDesc,
  81             'player_url': playerUrl,
  82         }