]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/beampro.py
[cbs] extract smpte and vtt subtitles
[youtube-dl.git] / youtube_dl / extractor / beampro.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     clean_html,
8     compat_str,
9     float_or_none,
10     int_or_none,
11     parse_iso8601,
12     try_get,
13     urljoin,
14 )
15
16
17 class BeamProBaseIE(InfoExtractor):
18     _API_BASE = 'https://mixer.com/api/v1'
19     _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
20
21     def _extract_channel_info(self, chan):
22         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
23         return {
24             'uploader': chan.get('token') or try_get(
25                 chan, lambda x: x['user']['username'], compat_str),
26             'uploader_id': compat_str(user_id) if user_id else None,
27             'age_limit': self._RATINGS.get(chan.get('audience')),
28         }
29
30
31 class BeamProLiveIE(BeamProBaseIE):
32     IE_NAME = 'Mixer:live'
33     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
34     _TEST = {
35         'url': 'http://mixer.com/niterhayven',
36         'info_dict': {
37             'id': '261562',
38             'ext': 'mp4',
39             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
40             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
41             'thumbnail': r're:https://.*\.jpg$',
42             'timestamp': 1483477281,
43             'upload_date': '20170103',
44             'uploader': 'niterhayven',
45             'uploader_id': '373396',
46             'age_limit': 18,
47             'is_live': True,
48             'view_count': int,
49         },
50         'skip': 'niterhayven is offline',
51         'params': {
52             'skip_download': True,
53         },
54     }
55
56     _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
57
58     @classmethod
59     def suitable(cls, url):
60         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
61
62     def _real_extract(self, url):
63         channel_name = self._match_id(url)
64
65         chan = self._download_json(
66             '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
67
68         if chan.get('online') is False:
69             raise ExtractorError(
70                 '{0} is offline'.format(channel_name), expected=True)
71
72         channel_id = chan['id']
73
74         def manifest_url(kind):
75             return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
76
77         formats = self._extract_m3u8_formats(
78             manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
79             fatal=False)
80         formats.extend(self._extract_smil_formats(
81             manifest_url('smil'), channel_name, fatal=False))
82         self._sort_formats(formats)
83
84         info = {
85             'id': compat_str(chan.get('id') or channel_name),
86             'title': self._live_title(chan.get('name') or channel_name),
87             'description': clean_html(chan.get('description')),
88             'thumbnail': try_get(
89                 chan, lambda x: x['thumbnail']['url'], compat_str),
90             'timestamp': parse_iso8601(chan.get('updatedAt')),
91             'is_live': True,
92             'view_count': int_or_none(chan.get('viewersTotal')),
93             'formats': formats,
94         }
95         info.update(self._extract_channel_info(chan))
96
97         return info
98
99
100 class BeamProVodIE(BeamProBaseIE):
101     IE_NAME = 'Mixer:vod'
102     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
103     _TEST = {
104         'url': 'https://mixer.com/willow8714?vod=2259830',
105         'md5': 'b2431e6e8347dc92ebafb565d368b76b',
106         'info_dict': {
107             'id': '2259830',
108             'ext': 'mp4',
109             'title': 'willow8714\'s Channel',
110             'duration': 6828.15,
111             'thumbnail': r're:https://.*source\.png$',
112             'timestamp': 1494046474,
113             'upload_date': '20170506',
114             'uploader': 'willow8714',
115             'uploader_id': '6085379',
116             'age_limit': 13,
117             'view_count': int,
118         },
119         'params': {
120             'skip_download': True,
121         },
122     }
123
124     @staticmethod
125     def _extract_format(vod, vod_type):
126         if not vod.get('baseUrl'):
127             return []
128
129         if vod_type == 'hls':
130             filename, protocol = 'manifest.m3u8', 'm3u8_native'
131         elif vod_type == 'raw':
132             filename, protocol = 'source.mp4', 'https'
133         else:
134             assert False
135
136         data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
137
138         format_id = [vod_type]
139         if isinstance(data.get('Height'), compat_str):
140             format_id.append('%sp' % data['Height'])
141
142         return [{
143             'url': urljoin(vod['baseUrl'], filename),
144             'format_id': '-'.join(format_id),
145             'ext': 'mp4',
146             'protocol': protocol,
147             'width': int_or_none(data.get('Width')),
148             'height': int_or_none(data.get('Height')),
149             'fps': int_or_none(data.get('Fps')),
150             'tbr': int_or_none(data.get('Bitrate'), 1000),
151         }]
152
153     def _real_extract(self, url):
154         vod_id = self._match_id(url)
155
156         vod_info = self._download_json(
157             '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
158
159         state = vod_info.get('state')
160         if state != 'AVAILABLE':
161             raise ExtractorError(
162                 'VOD %s is not available (state: %s)' % (vod_id, state),
163                 expected=True)
164
165         formats = []
166         thumbnail_url = None
167
168         for vod in vod_info['vods']:
169             vod_type = vod.get('format')
170             if vod_type in ('hls', 'raw'):
171                 formats.extend(self._extract_format(vod, vod_type))
172             elif vod_type == 'thumbnail':
173                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
174
175         self._sort_formats(formats)
176
177         info = {
178             'id': vod_id,
179             'title': vod_info.get('name') or vod_id,
180             'duration': float_or_none(vod_info.get('duration')),
181             'thumbnail': thumbnail_url,
182             'timestamp': parse_iso8601(vod_info.get('createdAt')),
183             'view_count': int_or_none(vod_info.get('viewsTotal')),
184             'formats': formats,
185         }
186         info.update(self._extract_channel_info(vod_info.get('channel') or {}))
187
188         return info