youtube_dl/extractor/trilulilu.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5
   6
   7 class TriluliluIE(InfoExtractor):
   8     _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
   9     _TEST = {
  10         u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
  11         u'file': u"big-buck-bunny-1.mp4",
  12         u'info_dict': {
  13             u"title": u"Big Buck Bunny",
  14             u"description": u":) pentru copilul din noi",
  15         },
  16         # Server ignores Range headers (--test)
  17         u"params": {
  18             u"skip_download": True
  19         }
  20     }
  21
  22     def _real_extract(self, url):
  23         mobj = re.match(self._VALID_URL, url)
  24         video_id = mobj.group('video_id')
  25
  26         webpage = self._download_webpage(url, video_id)
  27
  28         title = self._og_search_title(webpage)
  29         thumbnail = self._og_search_thumbnail(webpage)
  30         description = self._og_search_description(webpage)
  31
  32         log_str = self._search_regex(
  33             r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
  34         log = json.loads(log_str)
  35
  36         format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
  37                       u'video-formats2' % log)
  38         format_doc = self._download_xml(
  39             format_url, video_id,
  40             note=u'Downloading formats',
  41             errnote=u'Error while downloading formats')
  42
  43         video_url_template = (
  44             u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
  45             u'&source=site&hash=%(hash)s&username=%(userid)s&'
  46             u'key=ministhebest&format=%%s&sig=&exp=' %
  47             log)
  48         formats = [
  49             {
  50                 'format': fnode.text,
  51                 'url': video_url_template % fnode.text,
  52                 'ext': fnode.text.partition('-')[0]
  53             }
  54
  55             for fnode in format_doc.findall('./formats/format')
  56         ]
  57
  58         return {
  59             '_type': 'video',
  60             'id': video_id,
  61             'formats': formats,
  62             'title': title,
  63             'description': description,
  64             'thumbnail': thumbnail,
  65         }
  66