]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/funk.py
[nhk] add support for audio URLs
[youtube-dl.git] / youtube_dl / extractor / funk.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import InfoExtractor
8 from .nexx import NexxIE
9 from ..compat import compat_str
10 from ..utils import (
11     int_or_none,
12     try_get,
13 )
14
15
16 class FunkBaseIE(InfoExtractor):
17     _HEADERS = {
18         'Accept': '*/*',
19         'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
20         'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
21     }
22     _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
23
24     @staticmethod
25     def _make_headers(referer):
26         headers = FunkBaseIE._HEADERS.copy()
27         headers['Referer'] = referer
28         return headers
29
30     def _make_url_result(self, video):
31         return {
32             '_type': 'url_transparent',
33             'url': 'nexx:741:%s' % video['sourceId'],
34             'ie_key': NexxIE.ie_key(),
35             'id': video['sourceId'],
36             'title': video.get('title'),
37             'description': video.get('description'),
38             'duration': int_or_none(video.get('duration')),
39             'season_number': int_or_none(video.get('seasonNr')),
40             'episode_number': int_or_none(video.get('episodeNr')),
41         }
42
43
44 class FunkMixIE(FunkBaseIE):
45     _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
46     _TESTS = [{
47         'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
48         'md5': '8edf617c2f2b7c9847dfda313f199009',
49         'info_dict': {
50             'id': '123748',
51             'ext': 'mp4',
52             'title': '"Die realste Kifferdoku aller Zeiten"',
53             'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
54             'timestamp': 1490274721,
55             'upload_date': '20170323',
56         },
57     }]
58
59     def _real_extract(self, url):
60         mobj = re.match(self._VALID_URL, url)
61         mix_id = mobj.group('id')
62         alias = mobj.group('alias')
63
64         lists = self._download_json(
65             'https://www.funk.net/api/v3.1/curation/curatedLists/',
66             mix_id, headers=self._make_headers(url), query={
67                 'size': 100,
68             })['_embedded']['curatedListList']
69
70         metas = next(
71             l for l in lists
72             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
73         video = next(
74             meta['videoDataDelegate']
75             for meta in metas
76             if try_get(
77                 meta, lambda x: x['videoDataDelegate']['alias'],
78                 compat_str) == alias)
79
80         return self._make_url_result(video)
81
82
83 class FunkChannelIE(FunkBaseIE):
84     _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
85     _TESTS = [{
86         'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
87         'info_dict': {
88             'id': '1155821',
89             'ext': 'mp4',
90             'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
91             'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
92             'timestamp': 1514507395,
93             'upload_date': '20171229',
94         },
95         'params': {
96             'skip_download': True,
97         },
98     }, {
99         # only available via byIdList API
100         'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
101         'info_dict': {
102             'id': '205067',
103             'ext': 'mp4',
104             'title': 'Martin Sonneborn erklärt die EU',
105             'description': 'md5:050f74626e4ed87edf4626d2024210c0',
106             'timestamp': 1494424042,
107             'upload_date': '20170510',
108         },
109         'params': {
110             'skip_download': True,
111         },
112     }, {
113         'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
114         'only_matching': True,
115     }]
116
117     def _real_extract(self, url):
118         mobj = re.match(self._VALID_URL, url)
119         channel_id = mobj.group('id')
120         alias = mobj.group('alias')
121
122         headers = self._make_headers(url)
123
124         video = None
125
126         # Id-based channels are currently broken on their side: webplayer
127         # tries to process them via byChannelAlias endpoint and fails
128         # predictably.
129         for page_num in itertools.count():
130             by_channel_alias = self._download_json(
131                 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
132                 % channel_id,
133                 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
134                 headers=headers, query={
135                     'filterFsk': 'false',
136                     'sort': 'creationDate,desc',
137                     'size': 100,
138                     'page': page_num,
139                 }, fatal=False)
140             if not by_channel_alias:
141                 break
142             video_list = try_get(
143                 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
144             if not video_list:
145                 break
146             try:
147                 video = next(r for r in video_list if r.get('alias') == alias)
148                 break
149             except StopIteration:
150                 pass
151             if not try_get(
152                     by_channel_alias, lambda x: x['_links']['next']):
153                 break
154
155         if not video:
156             by_id_list = self._download_json(
157                 'https://www.funk.net/api/v3.0/content/videos/byIdList',
158                 channel_id, 'Downloading byIdList JSON', headers=headers,
159                 query={
160                     'ids': alias,
161                 }, fatal=False)
162             if by_id_list:
163                 video = try_get(by_id_list, lambda x: x['result'][0], dict)
164
165         if not video:
166             results = self._download_json(
167                 'https://www.funk.net/api/v3.0/content/videos/filter',
168                 channel_id, 'Downloading filter JSON', headers=headers, query={
169                     'channelId': channel_id,
170                     'size': 100,
171                 })['result']
172             video = next(r for r in results if r.get('alias') == alias)
173
174         return self._make_url_result(video)