X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=248b30ffb329d3870e0249e4d775c3f969849ce8;hb=714d709a31a8fbb8a0aee94df59730673c4c035b;hp=b0e29c2a8a5d8c7f6c4c0109ca09afa204d3b30b;hpb=2c62dc26c82bfd07a00e6775f6558d1dde7e088a;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b0e29c2a8..248b30ffb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..utils import ( get_element_by_id, get_element_by_attribute, ExtractorError, + RegexNotFoundError, unescapeHTML, unified_strdate, orderedSet, @@ -131,6 +132,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ( (?:https?://|//)? # http(s):// or protocol-independent URL (optional) (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| + (?:www\.)?deturl\.com/www\.youtube\.com/| + (?:www\.)?pwnyoutube\.com| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls @@ -194,6 +197,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, @@ -212,6 +216,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50}, '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50}, + + # RTMP (unnamed) + '_rtmp': {'protocol': 'rtmp'}, } IE_NAME = u'youtube' @@ -997,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), - 'name': l[0].encode('utf-8'), + 'name': unescapeHTML(l[0]).encode('utf-8'), }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url @@ -1272,7 +1279,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] + video_url_list = [('_rtmp', video_info['conn'][0])] elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] if 'rtmpe%3Dyes' in encoded_url_map: @@ -1442,7 +1449,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if re.search(self._MORE_PAGES_INDICATOR, page) is None: break - playlist_title = self._og_search_title(page) + try: + playlist_title = self._og_search_title(page) + except RegexNotFoundError: + self.report_warning( + u'Playlist page is missing OpenGraph title, falling back ...', + playlist_id) + playlist_title = self._html_search_regex( + r'

(.*?)

', page, u'title') url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, playlist_title) @@ -1758,6 +1772,6 @@ class YoutubeTruncatedURLIE(InfoExtractor): u'Did you forget to quote the URL? Remember that & is a meta ' u'character in most shells, so you want to put the URL in quotes, ' u'like youtube-dl ' - u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' - u' (or simply youtube-dl BaW_jenozKc ).', + u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + u' or simply youtube-dl BaW_jenozKc .', expected=True)