youtube_dl/extractor/goshgay.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     compat_urlparse,
   9     str_to_int,
  10     ExtractorError,
  11 )
  12 import json
  13
  14
  15 class GoshgayIE(InfoExtractor):
  16     _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
  17     _TEST = {
  18         'url': 'http://www.goshgay.com/video4116282',
  19         'md5': '268b9f3c3229105c57859e166dd72b03',
  20         'info_dict': {
  21             'id': '4116282',
  22             'ext': 'flv',
  23             'title': 'md5:089833a4790b5e103285a07337f245bf',
  24             'thumbnail': 're:http://.*\.jpg',
  25             'age_limit': 18,
  26         }
  27     }
  28
  29     def _real_extract(self, url):
  30         mobj = re.match(self._VALID_URL, url)
  31         video_id = mobj.group('id')
  32
  33         webpage = self._download_webpage(url, video_id)
  34         title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
  35
  36         player_config = self._search_regex(
  37             r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
  38         player_vars = json.loads(player_config.replace("'", '"'))
  39         width = str_to_int(player_vars.get('width'))
  40         height = str_to_int(player_vars.get('height'))
  41         config_uri = player_vars.get('config')
  42
  43         if config_uri is None:
  44             raise ExtractorError('Missing config URI')
  45         node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
  46                                   errnote='Unable to download XML')
  47         if node is None:
  48             raise ExtractorError('Missing config XML')
  49         if node.tag != 'config':
  50             raise ExtractorError('Missing config attribute')
  51         fns = node.findall('file')
  52         imgs = node.findall('image')
  53         if len(fns) != 1:
  54             raise ExtractorError('Missing media URI')
  55         video_url = fns[0].text
  56         if len(imgs) < 1:
  57             thumbnail = None
  58         else:
  59             thumbnail = imgs[0].text
  60
  61         url_comp = compat_urlparse.urlparse(url)
  62         ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
  63
  64         return {
  65             'id': video_id,
  66             'url': video_url,
  67             'title': title,
  68             'width': width,
  69             'height': height,
  70             'thumbnail': thumbnail,
  71             'http_referer': ref,
  72             'age_limit': 18,
  73         }