[mtv] Extract subtitles (Closes #4811)

[youtube-dl.git] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 03f3f18c83012cdced0e305fe1cc02d69a85bb7c..8b4ef3f09a6b0d00d223153e333d6459ff932e6a 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -14,6 +14,7 @@ import xml.etree.ElementTree
  
  from ..compat import (
      compat_cookiejar,
+    compat_HTTPError,
      compat_http_client,
      compat_urllib_error,
      compat_urllib_parse_urlparse,
@@ -26,6 +27,7 @@ from ..utils import (
      compiled_regex_type,
      ExtractorError,
      float_or_none,
+    HEADRequest,
      int_or_none,
      RegexNotFoundError,
      sanitize_filename,
@@ -87,7 +89,8 @@ class InfoExtractor(object):
                      * player_url SWF Player URL (used for rtmpdump).
                      * protocol   The protocol that will be used for the actual
                                   download, lower-case.
-                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
+                                 "http", "https", "rtsp", "rtmp", "rtmpe",
+                                 "m3u8", or "m3u8_native".
                      * preference Order number of this format. If this field is
                                   present and not None, the formats get sorted
                                   by this field, regardless of all other values.
@@ -108,15 +111,17 @@ class InfoExtractor(object):
                                    (quality takes higher priority)
                                   -1 for default (order by other properties),
                                   -2 or smaller for less than default.
-                    * http_referer  HTTP Referer header value to set.
                      * http_method  HTTP method to use for the download.
                      * http_headers  A dictionary of additional HTTP headers
                                   to add to the request.
                      * http_post_data  Additional data to send with a POST
                                   request.
                      * stretched_ratio  If given and not 1, indicates that the
-                                       video's pixels are not square.
-                                       width : height ratio as float.
+                                 video's pixels are not square.
+                                 width : height ratio as float.
+                    * no_resume  The server does not support resuming the
+                                 (HTTP or RTMP) download. Boolean.
+
      url:            Final video URL.
      ext:            Video filename extension.
      format:         The video format, defaults to ext (used for --get-format)
@@ -130,7 +135,9 @@ class InfoExtractor(object):
                      something like "4234987", title "Dancing naked mole rats",
                      and display_id "dancing-naked-mole-rats"
      thumbnails:     A list of dictionaries, with the following entries:
+                        * "id" (optional, string) - Thumbnail format ID
                          * "url"
+                        * "preference" (optional, int) - quality of the image
                          * "width" (optional, int)
                          * "height" (optional, int)
                          * "resolution" (optional, string "{width}x{height"},
@@ -712,6 +719,27 @@ class InfoExtractor(object):
              )
          formats.sort(key=_formats_key)
  
+    def _check_formats(self, formats, video_id):
+        if formats:
+            formats[:] = filter(
+                lambda f: self._is_valid_url(
+                    f['url'], video_id,
+                    item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+                formats)
+
+    def _is_valid_url(self, url, video_id, item='video'):
+        try:
+            self._request_webpage(
+                HEADRequest(url), video_id,
+                'Checking %s URL' % item)
+            return True
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self.report_warning(
+                    '%s URL is invalid, skipping' % item, video_id)
+                return False
+            raise
+
      def http_scheme(self):
          """ Either "http:" or "https:", depending on the user's preferences """
          return (