2 from __future__ import unicode_literals
6 from .common import InfoExtractor
15 class HeiseIE(InfoExtractor):
17 r'^https?://(?:www\.)?heise\.de/video/artikel/' +
18 r'.+?(?P<id>[0-9]+)\.html$'
22 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-' +
23 'Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
25 'md5': 'ffed432483e922e88545ad9f2f15d30e',
30 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / " +
31 "Peilsender Smartphone"
33 'format_id': 'mp4_720',
34 'timestamp': 1411812600,
35 'upload_date': '20140927',
40 r'".+?\?sequenz=(?P<sequenz>.+?)&container=(?P<container>.+?)' +
41 r'(?:&hd=(?P<hd>.+?))?(?:&signature=(?P<signature>.+?))?&callback=\?"'
43 _PREFIX = 'http://www.heise.de/videout/info?'
45 def _warn(self, fmt, *args):
46 self.report_warning(fmt.format(*args), self._id)
48 def _parse_config_url(self, html):
49 m = re.search(self._CONFIG, html)
51 raise ExtractorError('No config found')
53 qs = compat_urllib_parse.urlencode(dict((k, v) for k, v
54 in m.groupdict().items()
56 return self._PREFIX + qs
58 def _real_extract(self, url):
59 mobj = re.match(self._VALID_URL, url)
60 self._id = mobj.group('id')
62 html = self._download_webpage(url, self._id)
63 config = self._download_json(self._parse_config_url(html), self._id)
69 title = get_meta_content('fulltitle', html)
72 elif config.get('title'):
73 info['title'] = config['title']
75 self._warn('title: not found')
76 info['title'] = 'heise'
78 if (not config.get('formats') or
79 not hasattr(config['formats'], 'items')):
80 raise ExtractorError('No formats found')
83 for t, rs in config['formats'].items():
84 if not rs or not hasattr(rs, 'items'):
85 self._warn('formats: {0}: no resolutions', t)
88 for res, obj in rs.items():
89 format_id = '{0}_{1}'.format(t, res)
91 if not obj or not obj.get('url'):
92 self._warn('formats: {0}: no url', format_id)
97 'format_id': format_id
100 fmt['height'] = int(res)
101 except ValueError as e:
102 self._warn('formats: {0}: height: {1}', t, e)
106 self._sort_formats(formats)
107 info['formats'] = formats
109 if config.get('poster'):
110 info['thumbnail'] = config['poster']
112 date = get_meta_content('date', html)
115 info['timestamp'] = parse_iso8601(date)
116 except ValueError as e:
117 self._warn('timestamp: {0}', e)