]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/spiegeltv.py
[cspan] change into a function
[youtube-dl.git] / youtube_dl / extractor / spiegeltv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_urllib_parse_urlparse
6 from ..utils import (
7     determine_ext,
8     float_or_none,
9 )
10
11
12 class SpiegeltvIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
14     _TESTS = [{
15         'url': 'http://www.spiegel.tv/filme/flug-mh370/',
16         'info_dict': {
17             'id': 'flug-mh370',
18             'ext': 'm4v',
19             'title': 'Flug MH370',
20             'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
21             'thumbnail': 're:http://.*\.jpg$',
22         },
23         'params': {
24             # m3u8 download
25             'skip_download': True,
26         }
27     }, {
28         'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
29         'only_matching': True,
30     }]
31
32     def _real_extract(self, url):
33         if '/#/' in url:
34             url = url.replace('/#/', '/')
35         video_id = self._match_id(url)
36         webpage = self._download_webpage(url, video_id)
37         title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
38
39         apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
40         version_json = self._download_json(
41             '%s/version.json' % apihost, video_id,
42             note='Downloading version information')
43         version_name = version_json['version_name']
44
45         slug_json = self._download_json(
46             '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
47             video_id,
48             note='Downloading object information')
49         oid = slug_json['object_id']
50
51         media_json = self._download_json(
52             '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
53             video_id, note='Downloading media information')
54         uuid = media_json['uuid']
55         is_wide = media_json['is_wide']
56
57         server_json = self._download_json(
58             'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
59             video_id, note='Downloading server information')
60
61         format = '16x9' if is_wide else '4x3'
62
63         formats = []
64         for streamingserver in server_json['streamingserver']:
65             endpoint = streamingserver.get('endpoint')
66             if not endpoint:
67                 continue
68             play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
69             if endpoint.startswith('rtmp'):
70                 formats.append({
71                     'url': endpoint,
72                     'format_id': 'rtmp',
73                     'app': compat_urllib_parse_urlparse(endpoint).path[1:],
74                     'play_path': play_path,
75                     'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
76                     'ext': 'flv',
77                     'rtmp_live': True,
78                 })
79             elif determine_ext(endpoint) == 'm3u8':
80                 m3u8_formats = self._extract_m3u8_formats(
81                     endpoint.replace('[video]', play_path),
82                     video_id, 'm4v',
83                     preference=1,  # Prefer hls since it allows to workaround georestriction
84                     m3u8_id='hls', fatal=False)
85                 if m3u8_formats is not False:
86                     formats.extend(m3u8_formats)
87             else:
88                 formats.append({
89                     'url': endpoint,
90                 })
91
92         thumbnails = []
93         for image in media_json['images']:
94             thumbnails.append({
95                 'url': image['url'],
96                 'width': image['width'],
97                 'height': image['height'],
98             })
99
100         description = media_json['subtitle']
101         duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
102
103         return {
104             'id': video_id,
105             'title': title,
106             'description': description,
107             'duration': duration,
108             'thumbnails': thumbnails,
109             'formats': formats,
110         }