'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
'info_dict': {
'id': '3662a707-0af9-3149-963f-47bea720b460',
- 'title': 'BBC Blogs - Adam Curtis - BUGGER',
+ 'title': 'BUGGER',
},
'playlist_count': 18,
}, {
'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': {
'id': '34475836',
- 'title': 'What Liverpool can expect from Klopp',
+ 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
},
'playlist_count': 3,
+ }, {
+ # school report article with single video
+ 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+ 'info_dict': {
+ 'id': '35744779',
+ 'title': 'School which breaks down barriers in Jerusalem',
+ },
+ 'playlist_count': 1,
}, {
# single video with playlist URL from weather section
'url': 'http://www.bbc.com/weather/features/33601775',
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
timestamp = json_ld_info.get('timestamp')
+
playlist_title = json_ld_info.get('title')
- playlist_description = json_ld_info.get('description')
+ if not playlist_title:
+ playlist_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+ if playlist_title:
+ playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+ playlist_description = json_ld_info.get(
+ 'description') or self._og_search_description(webpage, default=None)
if not timestamp:
timestamp = parse_iso8601(self._search_regex(
entries.append(self._extract_from_playlist_sxml(
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
- playlist_title = self._og_search_title(webpage, default=None)
- playlist_title = playlist_title or self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'playlist title')
-
- playlist_title = self._search_regex(r'(.+)\s*-\s*BBC', playlist_title, 'title', default=playlist_title)
-
- playlist_description = self._og_search_description(webpage, default=None)
-
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
class BBCCoUkArticleIE(InfoExtractor):
- _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
IE_NAME = 'bbc.co.uk:article'
IE_DESC = 'BBC articles'