]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/twitch.py
[twitch] PEP8
[youtube-dl.git] / youtube_dl / extractor / twitch.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9     compat_str,
10     compat_urllib_parse,
11     compat_urllib_request,
12 )
13 from ..utils import (
14     ExtractorError,
15     parse_iso8601,
16 )
17
18
19 class TwitchBaseIE(InfoExtractor):
20     _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
21
22     _API_BASE = 'https://api.twitch.tv'
23     _USHER_BASE = 'http://usher.twitch.tv'
24     _LOGIN_URL = 'https://secure.twitch.tv/user/login'
25
26     def _handle_error(self, response):
27         if not isinstance(response, dict):
28             return
29         error = response.get('error')
30         if error:
31             raise ExtractorError(
32                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
33                 expected=True)
34
35     def _download_json(self, url, video_id, note='Downloading JSON metadata'):
36         response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
37         self._handle_error(response)
38         return response
39
40     def _real_initialize(self):
41         self._login()
42
43     def _login(self):
44         (username, password) = self._get_login_info()
45         if username is None:
46             return
47
48         login_page = self._download_webpage(
49             self._LOGIN_URL, None, 'Downloading login page')
50
51         authenticity_token = self._search_regex(
52             r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
53             login_page, 'authenticity token')
54
55         login_form = {
56             'utf8': '✓'.encode('utf-8'),
57             'authenticity_token': authenticity_token,
58             'redirect_on_login': '',
59             'embed_form': 'false',
60             'mp_source_action': '',
61             'follow': '',
62             'user[login]': username,
63             'user[password]': password,
64         }
65
66         request = compat_urllib_request.Request(
67             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
68         request.add_header('Referer', self._LOGIN_URL)
69         response = self._download_webpage(
70             request, None, 'Logging in as %s' % username)
71
72         m = re.search(
73             r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
74         if m:
75             raise ExtractorError(
76                 'Unable to login: %s' % m.group('msg').strip(), expected=True)
77
78
79 class TwitchItemBaseIE(TwitchBaseIE):
80     def _download_info(self, item, item_id):
81         return self._extract_info(self._download_json(
82             '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
83             'Downloading %s info JSON' % self._ITEM_TYPE))
84
85     def _extract_media(self, item_id):
86         info = self._download_info(self._ITEM_SHORTCUT, item_id)
87         response = self._download_json(
88             '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
89             'Downloading %s playlist JSON' % self._ITEM_TYPE)
90         entries = []
91         chunks = response['chunks']
92         qualities = list(chunks.keys())
93         for num, fragment in enumerate(zip(*chunks.values()), start=1):
94             formats = []
95             for fmt_num, fragment_fmt in enumerate(fragment):
96                 format_id = qualities[fmt_num]
97                 fmt = {
98                     'url': fragment_fmt['url'],
99                     'format_id': format_id,
100                     'quality': 1 if format_id == 'live' else 0,
101                 }
102                 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
103                 if m:
104                     fmt['height'] = int(m.group('height'))
105                 formats.append(fmt)
106             self._sort_formats(formats)
107             entry = dict(info)
108             entry['id'] = '%s_%d' % (entry['id'], num)
109             entry['title'] = '%s part %d' % (entry['title'], num)
110             entry['formats'] = formats
111             entries.append(entry)
112         return self.playlist_result(entries, info['id'], info['title'])
113
114     def _extract_info(self, info):
115         return {
116             'id': info['_id'],
117             'title': info['title'],
118             'description': info['description'],
119             'duration': info['length'],
120             'thumbnail': info['preview'],
121             'uploader': info['channel']['display_name'],
122             'uploader_id': info['channel']['name'],
123             'timestamp': parse_iso8601(info['recorded_at']),
124             'view_count': info['views'],
125         }
126
127     def _real_extract(self, url):
128         return self._extract_media(self._match_id(url))
129
130
131 class TwitchVideoIE(TwitchItemBaseIE):
132     IE_NAME = 'twitch:video'
133     _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
134     _ITEM_TYPE = 'video'
135     _ITEM_SHORTCUT = 'a'
136
137     _TEST = {
138         'url': 'http://www.twitch.tv/riotgames/b/577357806',
139         'info_dict': {
140             'id': 'a577357806',
141             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
142         },
143         'playlist_mincount': 12,
144     }
145
146
147 class TwitchChapterIE(TwitchItemBaseIE):
148     IE_NAME = 'twitch:chapter'
149     _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
150     _ITEM_TYPE = 'chapter'
151     _ITEM_SHORTCUT = 'c'
152
153     _TESTS = [{
154         'url': 'http://www.twitch.tv/acracingleague/c/5285812',
155         'info_dict': {
156             'id': 'c5285812',
157             'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
158         },
159         'playlist_mincount': 3,
160     }, {
161         'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
162         'only_matching': True,
163     }]
164
165
166 class TwitchVodIE(TwitchItemBaseIE):
167     IE_NAME = 'twitch:vod'
168     _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
169     _ITEM_TYPE = 'vod'
170     _ITEM_SHORTCUT = 'v'
171
172     _TEST = {
173         'url': 'http://www.twitch.tv/ksptv/v/3622000',
174         'info_dict': {
175             'id': 'v3622000',
176             'ext': 'mp4',
177             'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
178             'thumbnail': 're:^https?://.*\.jpg$',
179             'duration': 6951,
180             'timestamp': 1419028564,
181             'upload_date': '20141219',
182             'uploader': 'KSPTV',
183             'uploader_id': 'ksptv',
184             'view_count': int,
185         },
186         'params': {
187             # m3u8 download
188             'skip_download': True,
189         },
190     }
191
192     def _real_extract(self, url):
193         item_id = self._match_id(url)
194         info = self._download_info(self._ITEM_SHORTCUT, item_id)
195         access_token = self._download_json(
196             '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
197             'Downloading %s access token' % self._ITEM_TYPE)
198         formats = self._extract_m3u8_formats(
199             '%s/vod/%s?nauth=%s&nauthsig=%s'
200             % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
201             item_id, 'mp4')
202         info['formats'] = formats
203         return info
204
205
206 class TwitchPlaylistBaseIE(TwitchBaseIE):
207     _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
208     _PAGE_LIMIT = 100
209
210     def _extract_playlist(self, channel_id):
211         info = self._download_json(
212             '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
213             channel_id, 'Downloading channel info JSON')
214         channel_name = info.get('display_name') or info.get('name')
215         entries = []
216         offset = 0
217         limit = self._PAGE_LIMIT
218         for counter in itertools.count(1):
219             response = self._download_json(
220                 self._PLAYLIST_URL % (channel_id, offset, limit),
221                 channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
222             videos = response['videos']
223             if not videos:
224                 break
225             entries.extend([self.url_result(video['url']) for video in videos])
226             offset += limit
227         return self.playlist_result(entries, channel_id, channel_name)
228
229     def _real_extract(self, url):
230         return self._extract_playlist(self._match_id(url))
231
232
233 class TwitchProfileIE(TwitchPlaylistBaseIE):
234     IE_NAME = 'twitch:profile'
235     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
236     _PLAYLIST_TYPE = 'profile'
237
238     _TEST = {
239         'url': 'http://www.twitch.tv/vanillatv/profile',
240         'info_dict': {
241             'id': 'vanillatv',
242             'title': 'VanillaTV',
243         },
244         'playlist_mincount': 412,
245     }
246
247
248 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
249     IE_NAME = 'twitch:past_broadcasts'
250     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
251     _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
252     _PLAYLIST_TYPE = 'past broadcasts'
253
254     _TEST = {
255         'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
256         'info_dict': {
257             'id': 'spamfish',
258             'title': 'Spamfish',
259         },
260         'playlist_mincount': 54,
261     }
262
263
264 class TwitchStreamIE(TwitchBaseIE):
265     IE_NAME = 'twitch:stream'
266     _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
267
268     _TEST = {
269         'url': 'http://www.twitch.tv/shroomztv',
270         'info_dict': {
271             'id': '12772022048',
272             'display_id': 'shroomztv',
273             'ext': 'mp4',
274             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
275             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
276             'is_live': True,
277             'timestamp': 1421928037,
278             'upload_date': '20150122',
279             'uploader': 'ShroomzTV',
280             'uploader_id': 'shroomztv',
281             'view_count': int,
282         },
283         'params': {
284             # m3u8 download
285             'skip_download': True,
286         },
287     }
288
289     def _real_extract(self, url):
290         channel_id = self._match_id(url)
291
292         stream = self._download_json(
293             '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
294             'Downloading stream JSON').get('stream')
295
296         # Fallback on profile extraction if stream is offline
297         if not stream:
298             return self.url_result(
299                 'http://www.twitch.tv/%s/profile' % channel_id,
300                 'TwitchProfile', channel_id)
301
302         access_token = self._download_json(
303             '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
304             'Downloading channel access token')
305
306         query = {
307             'allow_source': 'true',
308             'p': '9386337',
309             'player': 'twitchweb',
310             'segment_preference': '4',
311             'sig': access_token['sig'],
312             'token': access_token['token'],
313         }
314
315         formats = self._extract_m3u8_formats(
316             '%s/api/channel/hls/%s.m3u8?%s'
317             % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
318             channel_id, 'mp4')
319
320         view_count = stream.get('viewers')
321         timestamp = parse_iso8601(stream.get('created_at'))
322
323         channel = stream['channel']
324         title = self._live_title(channel.get('display_name') or channel.get('name'))
325         description = channel.get('status')
326
327         thumbnails = []
328         for thumbnail_key, thumbnail_url in stream['preview'].items():
329             m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
330             if not m:
331                 continue
332             thumbnails.append({
333                 'url': thumbnail_url,
334                 'width': int(m.group('width')),
335                 'height': int(m.group('height')),
336             })
337
338         return {
339             'id': compat_str(stream['_id']),
340             'display_id': channel_id,
341             'title': title,
342             'description': description,
343             'thumbnails': thumbnails,
344             'uploader': channel.get('display_name'),
345             'uploader_id': channel.get('name'),
346             'timestamp': timestamp,
347             'view_count': view_count,
348             'formats': formats,
349             'is_live': True,
350         }