X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=4d098a377a6fa64f1246c1919cda943881e37afe;hb=96731798dbdd5a8878ac5cf29b69c6c7c821311b;hp=4ace22c2fc232ecacef491fd6ac6ecbd0ca3df01;hpb=46cbda0be4bed00122a5cf43e640808e6c32222d;p=youtube-dl.git diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4ace22c2f..4d098a377 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -27,9 +27,9 @@ std_headers = { } try: - compat_str = unicode # Python 2 + compat_str = unicode # Python 2 except NameError: - compat_str = str + compat_str = str def preferredencoding(): """Get preferred encoding. @@ -37,19 +37,17 @@ def preferredencoding(): Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - def yield_preferredencoding(): - try: - pref = locale.getpreferredencoding() - u'TEST'.encode(pref) - except: - pref = 'UTF-8' - while True: - yield pref - return yield_preferredencoding().next() + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + + return pref def htmlentity_transform(matchobj): - """Transforms an HTML entity to a Unicode character. + """Transforms an HTML entity to a character. This function receives a match object and is intended to be used with the re.sub() function. @@ -60,7 +58,6 @@ def htmlentity_transform(matchobj): if entity in htmlentitydefs.name2codepoint: return unichr(htmlentitydefs.name2codepoint[entity]) - # Unicode character mobj = re.match(ur'(?u)#(x?\d+)', entity) if mobj is not None: numstr = mobj.group(1) @@ -69,7 +66,7 @@ def htmlentity_transform(matchobj): numstr = u'0%s' % numstr else: base = 10 - return unichr(long(numstr, base)) + return unichr(int(numstr, base)) # Unknown entity in name, return its literal representation return (u'&%s;' % entity) @@ -128,8 +125,10 @@ class IDParser(HTMLParser.HTMLParser): handle_decl = handle_pi = unknown_decl = find_startpos def get_result(self): - if self.result == None: return None - if len(self.result) != 3: return None + if self.result is None: + return None + if len(self.result) != 3: + return None lines = self.html.split('\n') lines = lines[self.result[1][0]-1:self.result[2][0]] lines[0] = lines[0][self.result[1][1]:] @@ -208,7 +207,7 @@ def sanitize_filename(s, restricted=False): return '_-' if restricted else ' -' elif char in '\\/|*<>': return '_' - if restricted and (char in '&\'' or char.isspace()): + if restricted and (char in '!&\'' or char.isspace()): return '_' if restricted and ord(char) > 127: return '_' @@ -235,7 +234,7 @@ def orderedSet(iterable): def unescapeHTML(s): """ - @param s a string (of type unicode) + @param s a string """ assert type(s) == type(u'') @@ -244,7 +243,7 @@ def unescapeHTML(s): def encodeFilename(s): """ - @param s The name of the file (of type unicode) + @param s The name of the file """ assert type(s) == type(u'') @@ -316,7 +315,7 @@ class ContentTooShortError(Exception): class Trouble(Exception): """Trouble helper exception - + This is an exception to be handled with FileDownloader.trouble """