X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=8b4ef3f09a6b0d00d223153e333d6459ff932e6a;hb=e525d9a3dfb03152e133b8c0ccc8a104289cf5cf;hp=03f3f18c83012cdced0e305fe1cc02d69a85bb7c;hpb=bc694039e47cc871c98abacdf1c0a2e5a257a8a4;p=youtube-dl.git diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 03f3f18c8..8b4ef3f09 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -87,7 +89,8 @@ class InfoExtractor(object): * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. - "http", "https", "rtsp", "rtmp", "m3u8" or so. + "http", "https", "rtsp", "rtmp", "rtmpe", + "m3u8", or "m3u8_native". * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. @@ -108,15 +111,17 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_referer HTTP Referer header value to set. * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. * http_post_data Additional data to send with a POST request. * stretched_ratio If given and not 1, indicates that the - video's pixels are not square. - width : height ratio as float. + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -130,7 +135,9 @@ class InfoExtractor(object): something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID * "url" + * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, @@ -712,6 +719,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return (