From: Sergey M․ Date: Sat, 16 May 2015 21:32:53 +0000 (+0600) Subject: [tv2:article] Add extractor (Closes #5724) X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=588b82bbf8c90981c54f180eca40e6c743f8f89f;p=youtube-dl.git [tv2:article] Add extractor (Closes #5724) --- diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb4f63ca3..6f8c261d5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,7 +572,10 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE -from .tv2 import TV2IE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 2dcc0e971..fa338b936 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,12 +1,15 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, float_or_none, parse_iso8601, + remove_end, ) @@ -91,3 +94,33 @@ class TV2IE(InfoExtractor): 'categories': categories, 'formats': formats, } + + +class TV2ArticleIE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', + 'info_dict': { + 'id': '6930542', + 'title': 'Russen hetses etter pingvintyveri – innrømmer Ã¥ ha Ã¥pnet luken pÃ¥ buret', + 'description': 'md5:339573779d3eea3542ffe12006190954', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.tv2.no/a/6930542', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') + for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + + title = remove_end(self._og_search_title(webpage), ' - TV2.no') + description = remove_end(self._og_search_description(webpage), ' - TV2.no') + + return self.playlist_result(entries, playlist_id, title, description)