X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=ef02b68966e88d8d1cbc2f9d628e5a79fe8ce3c3;hb=e00c9cf5995fccd0c6be21757325ce0bd9ba68f8;hp=7ee95fe391ad9cac97c9cf0b0364c5a2a88b6f84;hpb=f55a1f0a8815c89b01a7a353cfa0bd5118f75829;p=youtube-dl.git

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7ee95fe39..ef02b6896 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -74,7 +74,7 @@ class InfoExtractor(object):
                                  "http", "https", "rtsp", "rtmp", "m3u8" or so.
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
-                                 by this field.
+                                 by this field, regardless of all other values.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
                     * quality    Order number of the video quality of this
@@ -97,7 +97,9 @@ class InfoExtractor(object):
     thumbnail:      Full URL to a video thumbnail image.
     description:    One-line video description.
     uploader:       Full name of the video uploader.
+    timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
+                    If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     location:       Physical location of the video.
     subtitles:      The subtitle file contents as a dictionary in the format
@@ -118,9 +120,6 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
-    _real_extract() must return a *list* of information dictionaries as
-    described above.
-
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
     """
@@ -252,7 +251,21 @@ class InfoExtractor(object):
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
-        content = webpage_bytes.decode(encoding, 'replace')
+        try:
+            content = webpage_bytes.decode(encoding, 'replace')
+        except LookupError:
+            content = webpage_bytes.decode('utf-8', 'replace')
+
+        if (u'<title>Access to this site is blocked</title>' in content and
+                u'Websense' in content[:512]):
+            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
+            blocked_iframe = self._html_search_regex(
+                r'<iframe src="([^"]+)"', content,
+                u'Websense information URL', default=None)
+            if blocked_iframe:
+                msg += u' Visit %s for more details' % blocked_iframe
+            raise ExtractorError(msg, expected=True)
+
         return (content, urlh)
 
     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
@@ -266,9 +279,12 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note=u'Downloading XML', errnote=u'Unable to download XML',
-                      transform_source=None):
+                      transform_source=None, fatal=True):
         """Return the xml as an xml.etree.ElementTree.Element"""
-        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        xml_string = self._download_webpage(
+            url_or_request, video_id, note, errnote, fatal=fatal)
+        if xml_string is False:
+            return xml_string
         if transform_source:
             xml_string = transform_source(xml_string)
         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))