2 from __future__ import unicode_literals
4 from .common import InfoExtractor
11 class RuvIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
15 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
16 'md5': '66347652f4e13e71936817102acc1724',
19 'display_id': 'fh-valur/20170516',
21 'title': 'FH - Valur',
22 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
23 'timestamp': 1494963600,
24 'upload_date': '20170516',
28 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
29 'md5': '395ea250c8a13e5fdb39d4670ef85378',
32 'display_id': 'morgunutvarpid/20170619',
34 'title': 'Morgunútvarpið',
35 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
36 'timestamp': 1497855000,
37 'upload_date': '20170619',
40 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
41 'only_matching': True,
43 'url': 'http://www.ruv.is/node/1151854',
44 'only_matching': True,
46 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
47 'only_matching': True,
49 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
50 'only_matching': True,
53 def _real_extract(self, url):
54 display_id = self._match_id(url)
56 webpage = self._download_webpage(url, display_id)
58 title = self._og_search_title(webpage)
60 FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
62 media_url = self._html_search_regex(
63 FIELD_RE % 'src', webpage, 'video URL', group='url')
65 video_id = self._search_regex(
66 r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
67 webpage, 'video id', default=display_id)
69 ext = determine_ext(media_url)
72 formats = self._extract_m3u8_formats(
73 media_url, video_id, 'mp4', entry_protocol='m3u8_native',
86 description = self._og_search_description(webpage, default=None)
87 thumbnail = self._og_search_thumbnail(
88 webpage, default=None) or self._search_regex(
89 FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
90 timestamp = unified_timestamp(self._html_search_meta(
91 'article:published_time', webpage, 'timestamp', fatal=False))
95 'display_id': display_id,
97 'description': description,
98 'thumbnail': thumbnail,
99 'timestamp': timestamp,