X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=4782326826670ba7d7f24abcf46970cf2f821a98;hb=e72c7e41238681338c92427f6731c74947645308;hp=b4cd59e4318a52019e060250499b1d50d1e01a8b;hpb=dd622d7c4ec15aec6ea5d66ed88a236147373e95;p=youtube-dl.git diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b4cd59e43..478232682 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -108,12 +110,17 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_referer HTTP Referer header value to set. * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. * http_post_data Additional data to send with a POST request. + * stretched_ratio If given and not 1, indicates that the + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -127,7 +134,9 @@ class InfoExtractor(object): something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID * "url" + * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, @@ -376,9 +385,19 @@ class InfoExtractor(object): return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): """ Returns the data of the page as a string """ - res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + success = False + try_count = 0 + while success is False: + try: + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + success = True + except compat_http_client.IncompleteRead as e: + try_count += 1 + if try_count >= tries: + raise e + self._sleep(timeout, video_id) if res is False: return res else: @@ -699,6 +718,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return ( @@ -729,8 +769,14 @@ class InfoExtractor(object): 'Unable to download f4m manifest') formats = [] + manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') + if not media_nodes: + manifest_version = '2.0' + media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): + if manifest_version == '2.0': + manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') tbr = int_or_none(media_el.attrib.get('bitrate')) format_id = 'f4m-%d' % (i if tbr is None else tbr) formats.append({