]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/hitbox.py
Merge pull request #12861 from Tithen-Firion/cbsinteractive-fix
[youtube-dl.git] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     clean_html,
9     parse_iso8601,
10     float_or_none,
11     int_or_none,
12     compat_str,
13     determine_ext,
14 )
15
16
17 class HitboxIE(InfoExtractor):
18     IE_NAME = 'hitbox'
19     _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
20     _TESTS = [{
21         'url': 'http://www.hitbox.tv/video/203213',
22         'info_dict': {
23             'id': '203213',
24             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25             'alt_title': 'hitboxlive - Aug 9th #6',
26             'description': '',
27             'ext': 'mp4',
28             'thumbnail': r're:^https?://.*\.jpg$',
29             'duration': 215.1666,
30             'resolution': 'HD 720p',
31             'uploader': 'hitboxlive',
32             'view_count': int,
33             'timestamp': 1407576133,
34             'upload_date': '20140809',
35             'categories': ['Live Show'],
36         },
37         'params': {
38             # m3u8 download
39             'skip_download': True,
40         },
41     }, {
42         'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
43         'only_matching': True,
44     }]
45
46     def _extract_metadata(self, url, video_id):
47         thumb_base = 'https://edge.sf.hitbox.tv'
48         metadata = self._download_json(
49             '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
50
51         date = 'media_live_since'
52         media_type = 'livestream'
53         if metadata.get('media_type') == 'video':
54             media_type = 'video'
55             date = 'media_date_added'
56
57         video_meta = metadata.get(media_type, [])[0]
58         title = video_meta.get('media_status')
59         alt_title = video_meta.get('media_title')
60         description = clean_html(
61             video_meta.get('media_description') or
62             video_meta.get('media_description_md'))
63         duration = float_or_none(video_meta.get('media_duration'))
64         uploader = video_meta.get('media_user_name')
65         views = int_or_none(video_meta.get('media_views'))
66         timestamp = parse_iso8601(video_meta.get(date), ' ')
67         categories = [video_meta.get('category_name')]
68         thumbs = [{
69             'url': thumb_base + video_meta.get('media_thumbnail'),
70             'width': 320,
71             'height': 180
72         }, {
73             'url': thumb_base + video_meta.get('media_thumbnail_large'),
74             'width': 768,
75             'height': 432
76         }]
77
78         return {
79             'id': video_id,
80             'title': title,
81             'alt_title': alt_title,
82             'description': description,
83             'ext': 'mp4',
84             'thumbnails': thumbs,
85             'duration': duration,
86             'uploader': uploader,
87             'view_count': views,
88             'timestamp': timestamp,
89             'categories': categories,
90         }
91
92     def _real_extract(self, url):
93         video_id = self._match_id(url)
94
95         player_config = self._download_json(
96             'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
97             video_id, 'Downloading video JSON')
98
99         formats = []
100         for video in player_config['clip']['bitrates']:
101             label = video.get('label')
102             if label == 'Auto':
103                 continue
104             video_url = video.get('url')
105             if not video_url:
106                 continue
107             bitrate = int_or_none(video.get('bitrate'))
108             if determine_ext(video_url) == 'm3u8':
109                 if not video_url.startswith('http'):
110                     continue
111                 formats.append({
112                     'url': video_url,
113                     'ext': 'mp4',
114                     'tbr': bitrate,
115                     'format_note': label,
116                     'protocol': 'm3u8_native',
117                 })
118             else:
119                 formats.append({
120                     'url': video_url,
121                     'tbr': bitrate,
122                     'format_note': label,
123                 })
124         self._sort_formats(formats)
125
126         metadata = self._extract_metadata(
127             'https://www.smashcast.tv/api/media/video', video_id)
128         metadata['formats'] = formats
129
130         return metadata
131
132
133 class HitboxLiveIE(HitboxIE):
134     IE_NAME = 'hitbox:live'
135     _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
136     _TESTS = [{
137         'url': 'http://www.hitbox.tv/dimak',
138         'info_dict': {
139             'id': 'dimak',
140             'ext': 'mp4',
141             'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
142             'timestamp': int,
143             'upload_date': compat_str,
144             'title': compat_str,
145             'uploader': 'Dimak',
146         },
147         'params': {
148             # live
149             'skip_download': True,
150         },
151     }, {
152         'url': 'https://www.smashcast.tv/dimak',
153         'only_matching': True,
154     }]
155
156     @classmethod
157     def suitable(cls, url):
158         return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
159
160     def _real_extract(self, url):
161         video_id = self._match_id(url)
162
163         player_config = self._download_json(
164             'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
165             video_id)
166
167         formats = []
168         cdns = player_config.get('cdns')
169         servers = []
170         for cdn in cdns:
171             # Subscribe URLs are not playable
172             if cdn.get('rtmpSubscribe') is True:
173                 continue
174             base_url = cdn.get('netConnectionUrl')
175             host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
176             if base_url not in servers:
177                 servers.append(base_url)
178                 for stream in cdn.get('bitrates'):
179                     label = stream.get('label')
180                     if label == 'Auto':
181                         continue
182                     stream_url = stream.get('url')
183                     if not stream_url:
184                         continue
185                     bitrate = int_or_none(stream.get('bitrate'))
186                     if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
187                         if not stream_url.startswith('http'):
188                             continue
189                         formats.append({
190                             'url': stream_url,
191                             'ext': 'mp4',
192                             'tbr': bitrate,
193                             'format_note': label,
194                             'rtmp_live': True,
195                         })
196                     else:
197                         formats.append({
198                             'url': '%s/%s' % (base_url, stream_url),
199                             'ext': 'mp4',
200                             'tbr': bitrate,
201                             'rtmp_live': True,
202                             'format_note': host,
203                             'page_url': url,
204                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
205                         })
206         self._sort_formats(formats)
207
208         metadata = self._extract_metadata(
209             'https://www.smashcast.tv/api/media/live', video_id)
210         metadata['formats'] = formats
211         metadata['is_live'] = True
212         metadata['title'] = self._live_title(metadata.get('title'))
213
214         return metadata