youtube_dl/extractor/servingsys.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     int_or_none,
   8 )
   9
  10
  11 class ServingSysIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
  13
  14     _TEST = {
  15         'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
  16         'playlist': [{
  17             'file': '29955898.flv',
  18             'md5': 'baed851342df6846eb8677a60a011a0f',
  19             'info_dict': {
  20                 'title': 'AdAPPter_Hyundai_demo (1)',
  21                 'duration': 74,
  22                 'tbr': 1378,
  23                 'width': 640,
  24                 'height': 400,
  25             },
  26         }, {
  27             'file': '29907998.flv',
  28             'md5': '979b4da2655c4bc2d81aeb915a8c5014',
  29             'info_dict': {
  30                 'title': 'AdAPPter_Hyundai_demo (2)',
  31                 'duration': 34,
  32                 'width': 854,
  33                 'height': 480,
  34                 'tbr': 516,
  35             },
  36         }],
  37         'params': {
  38             'playlistend': 2,
  39         },
  40         'skip': 'Blocked in the US [sic]',
  41     }
  42
  43     def _real_extract(self, url):
  44         mobj = re.match(self._VALID_URL, url)
  45         pl_id = mobj.group('id')
  46
  47         vast_doc = self._download_xml(url, pl_id)
  48         title = vast_doc.find('.//AdTitle').text
  49         media = vast_doc.find('.//MediaFile').text
  50         info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
  51
  52         doc = self._download_xml(info_url, pl_id, 'Downloading video info')
  53         entries = [{
  54             '_type': 'video',
  55             'id': a.attrib['id'],
  56             'title': '%s (%s)' % (title, a.attrib['assetID']),
  57             'url': a.attrib['URL'],
  58             'duration': int_or_none(a.attrib.get('length')),
  59             'tbr': int_or_none(a.attrib.get('bitrate')),
  60             'height': int_or_none(a.attrib.get('height')),
  61             'width': int_or_none(a.attrib.get('width')),
  62         } for a in doc.findall('.//AdditionalAssets/asset')]
  63
  64         return {
  65             '_type': 'playlist',
  66             'id': pl_id,
  67             'title': title,
  68             'entries': entries,
  69         }