youtube_dl/extractor/zattoo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 from uuid import uuid4
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_HTTPError,
  10     compat_str,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     int_or_none,
  15     try_get,
  16     url_or_none,
  17     urlencode_postdata,
  18 )
  19
  20
  21 class ZattooPlatformBaseIE(InfoExtractor):
  22     _power_guide_hash = None
  23
  24     def _host_url(self):
  25         return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
  26
  27     def _login(self):
  28         username, password = self._get_login_info()
  29         if not username or not password:
  30             self.raise_login_required(
  31                 'A valid %s account is needed to access this media.'
  32                 % self._NETRC_MACHINE)
  33
  34         try:
  35             data = self._download_json(
  36                 '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
  37                 data=urlencode_postdata({
  38                     'login': username,
  39                     'password': password,
  40                     'remember': 'true',
  41                 }), headers={
  42                     'Referer': '%s/login' % self._host_url(),
  43                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  44                 })
  45         except ExtractorError as e:
  46             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  47                 raise ExtractorError(
  48                     'Unable to login: incorrect username and/or password',
  49                     expected=True)
  50             raise
  51
  52         self._power_guide_hash = data['session']['power_guide_hash']
  53
  54     def _real_initialize(self):
  55         webpage = self._download_webpage(
  56             self._host_url(), None, 'Downloading app token')
  57         app_token = self._html_search_regex(
  58             r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
  59             webpage, 'app token', group='token')
  60         app_version = self._html_search_regex(
  61             r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
  62
  63         # Will setup appropriate cookies
  64         self._request_webpage(
  65             '%s/zapi/v2/session/hello' % self._host_url(), None,
  66             'Opening session', data=urlencode_postdata({
  67                 'client_app_token': app_token,
  68                 'uuid': compat_str(uuid4()),
  69                 'lang': 'en',
  70                 'app_version': app_version,
  71                 'format': 'json',
  72             }))
  73
  74         self._login()
  75
  76     def _extract_cid(self, video_id, channel_name):
  77         channel_groups = self._download_json(
  78             '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
  79                                                self._power_guide_hash),
  80             video_id, 'Downloading channel list',
  81             query={'details': False})['channel_groups']
  82         channel_list = []
  83         for chgrp in channel_groups:
  84             channel_list.extend(chgrp['channels'])
  85         try:
  86             return next(
  87                 chan['cid'] for chan in channel_list
  88                 if chan.get('cid') and (
  89                     chan.get('display_alias') == channel_name or
  90                     chan.get('cid') == channel_name))
  91         except StopIteration:
  92             raise ExtractorError('Could not extract channel id')
  93
  94     def _extract_cid_and_video_info(self, video_id):
  95         data = self._download_json(
  96             '%s/zapi/v2/cached/program/power_details/%s' % (
  97                 self._host_url(), self._power_guide_hash),
  98             video_id,
  99             'Downloading video information',
 100             query={
 101                 'program_ids': video_id,
 102                 'complete': True,
 103             })
 104
 105         p = data['programs'][0]
 106         cid = p['cid']
 107
 108         info_dict = {
 109             'id': video_id,
 110             'title': p.get('t') or p['et'],
 111             'description': p.get('d'),
 112             'thumbnail': p.get('i_url'),
 113             'creator': p.get('channel_name'),
 114             'episode': p.get('et'),
 115             'episode_number': int_or_none(p.get('e_no')),
 116             'season_number': int_or_none(p.get('s_no')),
 117             'release_year': int_or_none(p.get('year')),
 118             'categories': try_get(p, lambda x: x['c'], list),
 119             'tags': try_get(p, lambda x: x['g'], list)
 120         }
 121
 122         return cid, info_dict
 123
 124     def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
 125         postdata_common = {
 126             'https_watch_urls': True,
 127         }
 128
 129         if is_live:
 130             postdata_common.update({'timeshift': 10800})
 131             url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
 132         elif record_id:
 133             url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
 134         else:
 135             url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
 136
 137         formats = []
 138         for stream_type in ('dash', 'hls', 'hls5', 'hds'):
 139             postdata = postdata_common.copy()
 140             postdata['stream_type'] = stream_type
 141
 142             data = self._download_json(
 143                 url, video_id, 'Downloading %s formats' % stream_type.upper(),
 144                 data=urlencode_postdata(postdata), fatal=False)
 145             if not data:
 146                 continue
 147
 148             watch_urls = try_get(
 149                 data, lambda x: x['stream']['watch_urls'], list)
 150             if not watch_urls:
 151                 continue
 152
 153             for watch in watch_urls:
 154                 if not isinstance(watch, dict):
 155                     continue
 156                 watch_url = url_or_none(watch.get('url'))
 157                 if not watch_url:
 158                     continue
 159                 format_id_list = [stream_type]
 160                 maxrate = watch.get('maxrate')
 161                 if maxrate:
 162                     format_id_list.append(compat_str(maxrate))
 163                 audio_channel = watch.get('audio_channel')
 164                 if audio_channel:
 165                     format_id_list.append(compat_str(audio_channel))
 166                 preference = 1 if audio_channel == 'A' else None
 167                 format_id = '-'.join(format_id_list)
 168                 if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
 169                     this_formats = self._extract_mpd_formats(
 170                         watch_url, video_id, mpd_id=format_id, fatal=False)
 171                 elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
 172                     this_formats = self._extract_m3u8_formats(
 173                         watch_url, video_id, 'mp4',
 174                         entry_protocol='m3u8_native', m3u8_id=format_id,
 175                         fatal=False)
 176                 elif stream_type == 'hds':
 177                     this_formats = self._extract_f4m_formats(
 178                         watch_url, video_id, f4m_id=format_id, fatal=False)
 179                 elif stream_type == 'smooth_playready':
 180                     this_formats = self._extract_ism_formats(
 181                         watch_url, video_id, ism_id=format_id, fatal=False)
 182                 else:
 183                     assert False
 184                 for this_format in this_formats:
 185                     this_format['preference'] = preference
 186                 formats.extend(this_formats)
 187         self._sort_formats(formats)
 188         return formats
 189
 190     def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
 191         if is_live:
 192             cid = self._extract_cid(video_id, channel_name)
 193             info_dict = {
 194                 'id': channel_name,
 195                 'title': self._live_title(channel_name),
 196                 'is_live': True,
 197             }
 198         else:
 199             cid, info_dict = self._extract_cid_and_video_info(video_id)
 200         formats = self._extract_formats(
 201             cid, video_id, record_id=record_id, is_live=is_live)
 202         info_dict['formats'] = formats
 203         return info_dict
 204
 205
 206 class QuicklineBaseIE(ZattooPlatformBaseIE):
 207     _NETRC_MACHINE = 'quickline'
 208     _HOST = 'mobiltv.quickline.com'
 209
 210
 211 class QuicklineIE(QuicklineBaseIE):
 212     _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
 213
 214     _TEST = {
 215         'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 216         'only_matching': True,
 217     }
 218
 219     def _real_extract(self, url):
 220         channel_name, video_id = re.match(self._VALID_URL, url).groups()
 221         return self._extract_video(channel_name, video_id)
 222
 223
 224 class QuicklineLiveIE(QuicklineBaseIE):
 225     _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
 226
 227     _TEST = {
 228         'url': 'https://mobiltv.quickline.com/watch/srf1',
 229         'only_matching': True,
 230     }
 231
 232     @classmethod
 233     def suitable(cls, url):
 234         return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
 235
 236     def _real_extract(self, url):
 237         channel_name = video_id = self._match_id(url)
 238         return self._extract_video(channel_name, video_id, is_live=True)
 239
 240
 241 class ZattooBaseIE(ZattooPlatformBaseIE):
 242     _NETRC_MACHINE = 'zattoo'
 243     _HOST = 'zattoo.com'
 244
 245
 246 def _make_valid_url(tmpl, host):
 247     return tmpl % re.escape(host)
 248
 249
 250 class ZattooIE(ZattooBaseIE):
 251     _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
 252     _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
 253
 254     # Since regular videos are only available for 7 days and recorded videos
 255     # are only available for a specific user, we cannot have detailed tests.
 256     _TESTS = [{
 257         'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 258         'only_matching': True,
 259     }, {
 260         'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
 261         'only_matching': True,
 262     }]
 263
 264     def _real_extract(self, url):
 265         channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
 266         return self._extract_video(channel_name, video_id, record_id)
 267
 268
 269 class ZattooLiveIE(ZattooBaseIE):
 270     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
 271
 272     _TEST = {
 273         'url': 'https://zattoo.com/watch/srf1',
 274         'only_matching': True,
 275     }
 276
 277     @classmethod
 278     def suitable(cls, url):
 279         return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
 280
 281     def _real_extract(self, url):
 282         channel_name = video_id = self._match_id(url)
 283         return self._extract_video(channel_name, video_id, is_live=True)
 284
 285
 286 class NetPlusIE(ZattooIE):
 287     _NETRC_MACHINE = 'netplus'
 288     _HOST = 'netplus.tv'
 289     _API_HOST = 'www.%s' % _HOST
 290     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 291
 292     _TESTS = [{
 293         'url': 'https://www.netplus.tv/watch/abc/123-abc',
 294         'only_matching': True,
 295     }]
 296
 297
 298 class MNetTVIE(ZattooIE):
 299     _NETRC_MACHINE = 'mnettv'
 300     _HOST = 'tvplus.m-net.de'
 301     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 302
 303     _TESTS = [{
 304         'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
 305         'only_matching': True,
 306     }]
 307
 308
 309 class WalyTVIE(ZattooIE):
 310     _NETRC_MACHINE = 'walytv'
 311     _HOST = 'player.waly.tv'
 312     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 313
 314     _TESTS = [{
 315         'url': 'https://player.waly.tv/watch/abc/123-abc',
 316         'only_matching': True,
 317     }]
 318
 319
 320 class BBVTVIE(ZattooIE):
 321     _NETRC_MACHINE = 'bbvtv'
 322     _HOST = 'bbv-tv.net'
 323     _API_HOST = 'www.%s' % _HOST
 324     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 325
 326     _TESTS = [{
 327         'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
 328         'only_matching': True,
 329     }]
 330
 331
 332 class VTXTVIE(ZattooIE):
 333     _NETRC_MACHINE = 'vtxtv'
 334     _HOST = 'vtxtv.ch'
 335     _API_HOST = 'www.%s' % _HOST
 336     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 337
 338     _TESTS = [{
 339         'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
 340         'only_matching': True,
 341     }]
 342
 343
 344 class MyVisionTVIE(ZattooIE):
 345     _NETRC_MACHINE = 'myvisiontv'
 346     _HOST = 'myvisiontv.ch'
 347     _API_HOST = 'www.%s' % _HOST
 348     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 349
 350     _TESTS = [{
 351         'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
 352         'only_matching': True,
 353     }]
 354
 355
 356 class GlattvisionTVIE(ZattooIE):
 357     _NETRC_MACHINE = 'glattvisiontv'
 358     _HOST = 'iptv.glattvision.ch'
 359     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 360
 361     _TESTS = [{
 362         'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
 363         'only_matching': True,
 364     }]
 365
 366
 367 class SAKTVIE(ZattooIE):
 368     _NETRC_MACHINE = 'saktv'
 369     _HOST = 'saktv.ch'
 370     _API_HOST = 'www.%s' % _HOST
 371     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 372
 373     _TESTS = [{
 374         'url': 'https://www.saktv.ch/watch/abc/123-abc',
 375         'only_matching': True,
 376     }]
 377
 378
 379 class EWETVIE(ZattooIE):
 380     _NETRC_MACHINE = 'ewetv'
 381     _HOST = 'tvonline.ewe.de'
 382     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 383
 384     _TESTS = [{
 385         'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
 386         'only_matching': True,
 387     }]
 388
 389
 390 class QuantumTVIE(ZattooIE):
 391     _NETRC_MACHINE = 'quantumtv'
 392     _HOST = 'quantum-tv.com'
 393     _API_HOST = 'www.%s' % _HOST
 394     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 395
 396     _TESTS = [{
 397         'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
 398         'only_matching': True,
 399     }]
 400
 401
 402 class OsnatelTVIE(ZattooIE):
 403     _NETRC_MACHINE = 'osnateltv'
 404     _HOST = 'tvonline.osnatel.de'
 405     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 406
 407     _TESTS = [{
 408         'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
 409         'only_matching': True,
 410     }]
 411
 412
 413 class EinsUndEinsTVIE(ZattooIE):
 414     _NETRC_MACHINE = '1und1tv'
 415     _HOST = '1und1.tv'
 416     _API_HOST = 'www.%s' % _HOST
 417     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 418
 419     _TESTS = [{
 420         'url': 'https://www.1und1.tv/watch/abc/123-abc',
 421         'only_matching': True,
 422     }]
 423
 424
 425 class SaltTVIE(ZattooIE):
 426     _NETRC_MACHINE = 'salttv'
 427     _HOST = 'tv.salt.ch'
 428     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
 429
 430     _TESTS = [{
 431         'url': 'https://tv.salt.ch/watch/abc/123-abc',
 432         'only_matching': True,
 433     }]