Simplified preferredencoding()

[youtube-dl.git] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 0f903c64a0d8c71517378143ecad1caf07cb5893..7faa046c8cf6f9a464f8de65cfd0c637664329ba 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -11,16 +11,12 @@ import sys
  import zlib
  import urllib2
  import email.utils
  import zlib
  import urllib2
  import email.utils
+import json
  
  try:
         import cStringIO as StringIO
  except ImportError:
         import StringIO
  
  try:
         import cStringIO as StringIO
  except ImportError:
         import StringIO
-               
-try:
-       import json
-except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
-       import trivialjson as json
  
  std_headers = {
         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  
  std_headers = {
         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
@@ -36,15 +32,13 @@ def preferredencoding():
         Returns the best encoding scheme for the system, based on
         locale.getpreferredencoding() and some further tweaks.
         """
         Returns the best encoding scheme for the system, based on
         locale.getpreferredencoding() and some further tweaks.
         """
-       def yield_preferredencoding():
-               try:
-                       pref = locale.getpreferredencoding()
-                       u'TEST'.encode(pref)
-               except:
-                       pref = 'UTF-8'
-               while True:
-                       yield pref
-       return yield_preferredencoding().next()
+       try:
+               pref = locale.getpreferredencoding()
+               u'TEST'.encode(pref)
+       except:
+               pref = 'UTF-8'
+
+       return pref
  
  
  def htmlentity_transform(matchobj):
  
  
  def htmlentity_transform(matchobj):
@@ -87,7 +81,7 @@ class IDParser(HTMLParser.HTMLParser):
                 HTMLParser.HTMLParser.__init__(self)
  
         def error(self, message):
                 HTMLParser.HTMLParser.__init__(self)
  
         def error(self, message):
-               print self.getpos()
+               print >> sys.stderr, self.getpos()
                 if self.error_count > 10 or self.started:
                         raise HTMLParser.HTMLParseError(message, self.getpos())
                 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
                 if self.error_count > 10 or self.started:
                         raise HTMLParser.HTMLParseError(message, self.getpos())
                 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -160,12 +154,6 @@ def clean_html(html):
         return html
  
  
         return html
  
  
-def sanitize_title(utitle):
-       """Sanitizes a video title so it could be used as part of a filename."""
-       utitle = unescapeHTML(utitle)
-       return utitle.replace(unicode(os.sep), u'%')
-
-
  def sanitize_open(filename, open_mode):
         """Try to open the given filename, and slightly tweak it if this fails.
  
  def sanitize_open(filename, open_mode):
         """Try to open the given filename, and slightly tweak it if this fails.
  
@@ -200,10 +188,14 @@ def timeconvert(timestr):
         if timetuple is not None:
                 timestamp = email.utils.mktime_tz(timetuple)
         return timestamp
         if timetuple is not None:
                 timestamp = email.utils.mktime_tz(timetuple)
         return timestamp
-
-def simplify_title(title):
-       expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
-       return expr.sub(u'_', title).strip(u'_')
+       
+def sanitize_filename(s):
+       """Sanitizes a string so it could be used as part of a filename."""
+       def replace_insane(char):
+               if char in u' .\\/|?*<>:"' or ord(char) < 32:
+                       return '_'
+               return char
+       return u''.join(map(replace_insane, s)).strip('_')
  
  def orderedSet(iterable):
         """ Remove all duplicates from the input iterable """
  
  def orderedSet(iterable):
         """ Remove all duplicates from the input iterable """
@@ -294,6 +286,13 @@ class ContentTooShortError(Exception):
                 self.expected = expected
  
  
                 self.expected = expected
  
  
+class Trouble(Exception):
+       """Trouble helper exception
+       
+       This is an exception to be handled with
+       FileDownloader.trouble
+       """
+
  class YoutubeDLHandler(urllib2.HTTPHandler):
         """Handler for HTTP requests and responses.
  
  class YoutubeDLHandler(urllib2.HTTPHandler):
         """Handler for HTTP requests and responses.