import os
import re
import sys
+import traceback
import zlib
import email.utils
import json
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5',
}
+
def preferredencoding():
"""Get preferred encoding.
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
-
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+ AttrParser.parse_endtag = (lambda self, i:
+ i + len("</scr'+'ipt>")
+ if self.rawdata[i:].startswith("</scr'+'ipt>")
+ else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s
else:
- return s.encode(sys.getfilesystemencoding(), 'ignore')
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return s.encode(encoding, 'ignore')
+
+
+class ExtractorError(Exception):
+ """Error during info extraction."""
+ def __init__(self, msg, tb=None):
+ """ tb, if given, is the original traceback (so that it can be printed out). """
+ super(ExtractorError, self).__init__(msg)
+ self.traceback = tb
+
+ def format_traceback(self):
+ if self.traceback is None:
+ return None
+ return u''.join(traceback.format_tb(self.traceback))
+
class DownloadError(Exception):
"""Download Error exception.
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
- pass
+ def __init__(self, msg):
+ self.msg = msg
class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """
return ret
def http_request(self, req):
- for h in std_headers:
+ for h,v in std_headers.items():
if h in req.headers:
del req.headers[h]
- req.add_header(h, std_headers[h])
+ req.add_header(h, v)
if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
+ if 'Youtubedl-user-agent' in req.headers:
+ if 'User-agent' in req.headers:
+ del req.headers['User-agent']
+ req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
+ del req.headers['Youtubedl-user-agent']
return req
def http_response(self, req, resp):