youtube_dl/extractor/funk.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import re
   6
   7 from .common import InfoExtractor
   8 from .nexx import NexxIE
   9 from ..compat import compat_str
  10 from ..utils import (
  11     int_or_none,
  12     try_get,
  13 )
  14
  15
  16 class FunkBaseIE(InfoExtractor):
  17     _HEADERS = {
  18         'Accept': '*/*',
  19         'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
  20         'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
  21     }
  22     _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
  23
  24     @staticmethod
  25     def _make_headers(referer):
  26         headers = FunkBaseIE._HEADERS.copy()
  27         headers['Referer'] = referer
  28         return headers
  29
  30     def _make_url_result(self, video):
  31         return {
  32             '_type': 'url_transparent',
  33             'url': 'nexx:741:%s' % video['sourceId'],
  34             'ie_key': NexxIE.ie_key(),
  35             'id': video['sourceId'],
  36             'title': video.get('title'),
  37             'description': video.get('description'),
  38             'duration': int_or_none(video.get('duration')),
  39             'season_number': int_or_none(video.get('seasonNr')),
  40             'episode_number': int_or_none(video.get('episodeNr')),
  41         }
  42
  43
  44 class FunkMixIE(FunkBaseIE):
  45     _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
  46     _TESTS = [{
  47         'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
  48         'md5': '8edf617c2f2b7c9847dfda313f199009',
  49         'info_dict': {
  50             'id': '123748',
  51             'ext': 'mp4',
  52             'title': '"Die realste Kifferdoku aller Zeiten"',
  53             'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
  54             'timestamp': 1490274721,
  55             'upload_date': '20170323',
  56         },
  57     }]
  58
  59     def _real_extract(self, url):
  60         mobj = re.match(self._VALID_URL, url)
  61         mix_id = mobj.group('id')
  62         alias = mobj.group('alias')
  63
  64         lists = self._download_json(
  65             'https://www.funk.net/api/v3.1/curation/curatedLists/',
  66             mix_id, headers=self._make_headers(url), query={
  67                 'size': 100,
  68             })['_embedded']['curatedListList']
  69
  70         metas = next(
  71             l for l in lists
  72             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
  73         video = next(
  74             meta['videoDataDelegate']
  75             for meta in metas
  76             if try_get(
  77                 meta, lambda x: x['videoDataDelegate']['alias'],
  78                 compat_str) == alias)
  79
  80         return self._make_url_result(video)
  81
  82
  83 class FunkChannelIE(FunkBaseIE):
  84     _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
  85     _TESTS = [{
  86         'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
  87         'info_dict': {
  88             'id': '1155821',
  89             'ext': 'mp4',
  90             'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
  91             'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
  92             'timestamp': 1514507395,
  93             'upload_date': '20171229',
  94         },
  95         'params': {
  96             'skip_download': True,
  97         },
  98     }, {
  99         # only available via byIdList API
 100         'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
 101         'info_dict': {
 102             'id': '205067',
 103             'ext': 'mp4',
 104             'title': 'Martin Sonneborn erklärt die EU',
 105             'description': 'md5:050f74626e4ed87edf4626d2024210c0',
 106             'timestamp': 1494424042,
 107             'upload_date': '20170510',
 108         },
 109         'params': {
 110             'skip_download': True,
 111         },
 112     }, {
 113         'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
 114         'only_matching': True,
 115     }]
 116
 117     def _real_extract(self, url):
 118         mobj = re.match(self._VALID_URL, url)
 119         channel_id = mobj.group('id')
 120         alias = mobj.group('alias')
 121
 122         headers = self._make_headers(url)
 123
 124         video = None
 125
 126         # Id-based channels are currently broken on their side: webplayer
 127         # tries to process them via byChannelAlias endpoint and fails
 128         # predictably.
 129         for page_num in itertools.count():
 130             by_channel_alias = self._download_json(
 131                 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
 132                 % channel_id,
 133                 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
 134                 headers=headers, query={
 135                     'filterFsk': 'false',
 136                     'sort': 'creationDate,desc',
 137                     'size': 100,
 138                     'page': page_num,
 139                 }, fatal=False)
 140             if not by_channel_alias:
 141                 break
 142             video_list = try_get(
 143                 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
 144             if not video_list:
 145                 break
 146             try:
 147                 video = next(r for r in video_list if r.get('alias') == alias)
 148                 break
 149             except StopIteration:
 150                 pass
 151             if not try_get(
 152                     by_channel_alias, lambda x: x['_links']['next']):
 153                 break
 154
 155         if not video:
 156             by_id_list = self._download_json(
 157                 'https://www.funk.net/api/v3.0/content/videos/byIdList',
 158                 channel_id, 'Downloading byIdList JSON', headers=headers,
 159                 query={
 160                     'ids': alias,
 161                 }, fatal=False)
 162             if by_id_list:
 163                 video = try_get(by_id_list, lambda x: x['result'][0], dict)
 164
 165         if not video:
 166             results = self._download_json(
 167                 'https://www.funk.net/api/v3.0/content/videos/filter',
 168                 channel_id, 'Downloading filter JSON', headers=headers, query={
 169                     'channelId': channel_id,
 170                     'size': 100,
 171                 })['result']
 172             video = next(r for r in results if r.get('alias') == alias)
 173
 174         return self._make_url_result(video)