class NRKBaseIE(InfoExtractor):
_faked_ip = None
- def _download_webpage(self, *args, **kwargs):
+ def _download_webpage_handle(self, *args, **kwargs):
# NRK checks X-Forwarded-For HTTP header in order to figure out the
# origin of the client behind proxy. This allows to bypass geo
# restriction by faking this header's value to some Norway IP.
# We will do so once we encounter any geo restriction error.
if self._faked_ip:
- kwargs.setdefault('headers', {})['X-Forwarded-For'] = self._faked_ip
- return super(NRKBaseIE, self)._download_webpage(*args, **kwargs)
+ # NB: str is intentional
+ kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
+ return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
def _fake_ip(self):
# Use fake IP from 37.191.128.0/17 in order to workaround geo
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
+ http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
+
entries = []
+ conviva = data.get('convivaStatistics') or {}
+ live = (data.get('mediaElementType') == 'Live' or
+ data.get('isLive') is True or conviva.get('isLive'))
+
+ def make_title(t):
+ return self._live_title(t) if live else t
+
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
if not formats:
continue
self._sort_formats(formats)
+
+ # Some f4m streams may not work with hdcore in fragments' URLs
+ for f in formats:
+ extra_param = f.get('extra_param_to_segment_url')
+ if extra_param and 'hdcore' in extra_param:
+ del f['extra_param_to_segment_url']
+
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
})
entries.append({
'id': asset.get('carrierId') or entry_id,
- 'title': entry_title,
+ 'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
+ 'http_headers': http_headers,
})
if not entries:
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
- 'title': title,
+ 'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if not entries:
- message_type = data.get('messageType')
- if message_type == 'ProgramIsGeoBlocked' and not self._faked_ip:
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type and not self._faked_ip:
self.report_warning(
'Video is geo restricted, trying to fake IP')
self._fake_ip()
message_type, message_type)),
expected=True)
- conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
- _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:tv|radio)\.nrk(?:super)?\.no/
+ (?:serie/[^/]+|program)/
+ (?![Ee]pisodes)%s
+ (?:/\d{2}-\d{2}-\d{4})?
+ (?:\#del=(?P<part_id>\d+))?
+ ''' % _EPISODE_RE
_API_HOST = 'psapi-we.nrk.no'
_TESTS = [{
}]
-class NRKPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+class NRKTVDirekteIE(NRKTVIE):
+ IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/direkte/nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+ 'only_matching': True,
+ }]
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self. _extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
'info_dict': {
'playlist_count': 5,
}]
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
- webpage = self._download_webpage(url, playlist_id)
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
- entries = [
- self.url_result('nrk:%s' % video_id, 'NRK')
- for video_id in re.findall(
- r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
- webpage)
- ]
- playlist_title = self._og_search_title(webpage)
- playlist_description = self._og_search_description(webpage)
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
- return self.playlist_result(
- entries, playlist_id, playlist_title, playlist_description)
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
class NRKSkoleIE(InfoExtractor):