import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
class NRKPlaylistIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:[^/]+/)*(?P<id>[^/]+)'
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
'info_dict': {
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
'title': 'Gjenopplev den historiske solformørkelsen',
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
},
- 'playlist_mincount': 2,
- }
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
def _real_extract(self, url):
playlist_id = self._match_id(url)
entries = [
self.url_result('nrk:%s' % video_id, 'NRK')
for video_id in re.findall(
- r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="(\d+)"', webpage)
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
]
playlist_title = self._og_search_title(webpage)
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
captions = self._download_xml(
- url, video_id, 'Downloading subtitles',
- transform_source=lambda s: s.replace(r'<br />', '\r\n'))
+ url, video_id, 'Downloading subtitles')
lang = captions.get('lang', 'no')
- ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
- srt = ''
- for pos, p in enumerate(ps):
- begin = parse_duration(p.get('begin'))
- duration = parse_duration(p.get('dur'))
- starttime = self._subtitles_timecode(begin)
- endtime = self._subtitles_timecode(begin + duration)
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
- {'ext': 'srt', 'data': srt},
]}
def _extract_f4m(self, manifest_url, video_id):