-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):
class ProSiebenSat1IE(ProSiebenSat1BaseIE):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ (?:
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
+ )\.(?:de|at|ch)|
+ ran\.de|fem\.com|advopedia\.de
+ )
+ /(?P<id>.+)
+ '''
_TESTS = [
{
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
'info_dict': {
'id': '2572814',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'upload_date': '20131017',
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
'info_dict': {
'id': '2156342',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Kurztrips zum Valentinstag',
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
'duration': 307.24,
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
+ 'skip': 'This video is unavailable',
},
{
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
'info_dict': {
'id': '4187506',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Best of Circus HalliGalli',
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
'upload_date': '20151229',
'skip_download': True,
},
},
+ {
+ # title in <h2 class="subtitle">
+ 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
+ 'info_dict': {
+ 'id': '4895826',
+ 'ext': 'mp4',
+ 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
+ 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
+ 'upload_date': '20170302',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'geo restricted to Germany',
+ },
{
# geo restricted to Germany
'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
'only_matching': True,
},
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+ 'only_matching': True,
+ },
]
_TOKEN = 'prosieben'
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
+ r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
]
_DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>',
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(
self._CLIPID_REGEXES, webpage, 'clip id')
- title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
+ title = self._html_search_regex(
+ self._TITLE_REGEXES, webpage, 'title',
+ default=None) or self._og_search_title(webpage)
info = self._extract_video_info(url, clip_id)
description = self._html_search_regex(
- self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+ self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
+ if description is None:
+ description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
- for regex in self._PLAYLIST_CLIP_REGEXES:
- playlist_clips = re.findall(regex, webpage)
- if playlist_clips:
- title = self._html_search_regex(
- self._TITLE_REGEXES, webpage, 'title')
- description = self._html_search_regex(
- self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
- entries = [
- self.url_result(
- re.match('(.+?//.+?)/', url).group(1) + clip_path,
- 'ProSiebenSat1')
- for clip_path in playlist_clips]
- return self.playlist_result(entries, playlist_id, title, description)
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ webpage, 'playlist'),
+ playlist_id)
+ entries = []
+ for item in playlist:
+ clip_id = item.get('id') or item.get('upc')
+ if not clip_id:
+ continue
+ info = self._extract_video_info(url, clip_id)
+ info.update({
+ 'id': clip_id,
+ 'title': item.get('title') or item.get('teaser', {}).get('headline'),
+ 'description': item.get('teaser', {}).get('description'),
+ 'thumbnail': item.get('poster'),
+ 'duration': float_or_none(item.get('duration')),
+ 'series': item.get('tvShowTitle'),
+ 'uploader': item.get('broadcastPublisher'),
+ })
+ entries.append(info)
+ return self.playlist_result(entries, playlist_id)
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)
+ else:
+ raise ExtractorError(
+ 'Unsupported page type %s' % page_type, expected=True)