X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fbbc.py;h=425f08f2b6bba12f054f13aa0de6eefddcebcfa7;hb=8c7d6e8e2279beccf638cd0fae9d91876e0486b2;hp=f4d8b4a2f2c1b12bb5ed2481d24839e76b7841cb;hpb=8e4aa7bf18af4403bf98742270483f3b9cfbdeb6;p=youtube-dl.git diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index f4d8b4a2f..425f08f2b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -328,6 +328,7 @@ class BBCCoUkIE(InfoExtractor): 'format_id': '%s_%s' % (service, format['format_id']), 'abr': abr, 'acodec': acodec, + 'vcodec': 'none', }) formats.extend(conn_formats) return formats @@ -560,7 +561,7 @@ class BBCIE(BBCCoUkIE): 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460', 'info_dict': { 'id': '3662a707-0af9-3149-963f-47bea720b460', - 'title': 'BBC Blogs - Adam Curtis - BUGGER', + 'title': 'BUGGER', }, 'playlist_count': 18, }, { @@ -669,9 +670,17 @@ class BBCIE(BBCCoUkIE): 'url': 'http://www.bbc.com/sport/0/football/34475836', 'info_dict': { 'id': '34475836', - 'title': 'What Liverpool can expect from Klopp', + 'title': 'Jurgen Klopp: Furious football from a witty and winning coach', }, 'playlist_count': 3, + }, { + # school report article with single video + 'url': 'http://www.bbc.co.uk/schoolreport/35744779', + 'info_dict': { + 'id': '35744779', + 'title': 'School which breaks down barriers in Jerusalem', + }, + 'playlist_count': 1, }, { # single video with playlist URL from weather section 'url': 'http://www.bbc.com/weather/features/33601775', @@ -680,6 +689,10 @@ class BBCIE(BBCCoUkIE): # custom redirection to www.bbc.com 'url': 'http://www.bbc.co.uk/news/science-environment-33661876', 'only_matching': True, + }, { + # single video article embedded with data-media-vpid + 'url': 'http://www.bbc.co.uk/sport/rowing/35908187', + 'only_matching': True, }] @classmethod @@ -734,8 +747,17 @@ class BBCIE(BBCCoUkIE): json_ld_info = self._search_json_ld(webpage, playlist_id, default=None) timestamp = json_ld_info.get('timestamp') + playlist_title = json_ld_info.get('title') - playlist_description = json_ld_info.get('description') + if not playlist_title: + playlist_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'(.+?)', webpage, 'playlist title', default=None) + if playlist_title: + playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() + + playlist_description = json_ld_info.get( + 'description') or self._og_search_description(webpage, default=None) if not timestamp: timestamp = parse_iso8601(self._search_regex( @@ -795,20 +817,12 @@ class BBCIE(BBCCoUkIE): entries.append(self._extract_from_playlist_sxml( playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) - playlist_title = self._og_search_title(webpage, default=None) - playlist_title = playlist_title or self._html_search_regex( - r'(.*?)', webpage, 'playlist title') - - playlist_title = self._search_regex(r'(.+)\s*-\s*BBC', playlist_title, 'title', default=playlist_title) - - playlist_description = self._og_search_description(webpage, default=None) - if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-video-player-vpid="(%s)"' % self._ID_REGEX, + [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], webpage, 'vpid', default=None) @@ -933,7 +947,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles'