X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=9784abb2400a4b09e0067757f334ec5942d55903;hb=f427df17abc9508f88af9d904ac0520d610c0e9c;hp=7f73b84761362b88e788e7a4b0568e143e8c5731;hpb=3cd69a54b20b690d570e7c7ae5a9bba26f30b897;p=youtube-dl.git

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7f73b8476..9784abb24 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3,6 +3,7 @@
 
 import gzip
 import io
+import json
 import locale
 import os
 import re
@@ -51,6 +52,12 @@ try:
 except ImportError: # Python 2
     import httplib as compat_http_client
 
+try:
+    from subprocess import DEVNULL
+    compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+    compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
 try:
     from urllib.parse import parse_qs as compat_parse_qs
 except ImportError: # Python 2
@@ -147,6 +154,7 @@ std_headers = {
     'Accept-Encoding': 'gzip, deflate',
     'Accept-Language': 'en-us,en;q=0.5',
 }
+
 def preferredencoding():
     """Get preferred encoding.
 
@@ -169,6 +177,23 @@ else:
         assert type(s) == type(u'')
         print(s)
 
+# In Python 2.x, json.dump expects a bytestream.
+# In Python 3.x, it writes to a character stream
+if sys.version_info < (3,0):
+    def write_json_file(obj, fn):
+        with open(fn, 'wb') as f:
+            json.dump(obj, f)
+else:
+    def write_json_file(obj, fn):
+        with open(fn, 'w', encoding='utf-8') as f:
+            json.dump(obj, f)
+
+# Some library functions return bytestring on 2.X and unicode on 3.X
+def enforce_unicode(s, encoding='utf-8'):
+    if type(s) != type(u''):
+        return s.decode(encoding)
+    return s
+
 def htmlentity_transform(matchobj):
     """Transforms an HTML entity to a character.
 
@@ -195,10 +220,11 @@ def htmlentity_transform(matchobj):
     return (u'&%s;' % entity)
 
 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class IDParser(compat_html_parser.HTMLParser):
-    """Modified HTMLParser that isolates a tag with the specified id"""
-    def __init__(self, id):
-        self.id = id
+class AttrParser(compat_html_parser.HTMLParser):
+    """Modified HTMLParser that isolates a tag with the specified attribute"""
+    def __init__(self, attribute, value):
+        self.attribute = attribute
+        self.value = value
         self.result = None
         self.started = False
         self.depth = {}
@@ -223,7 +249,7 @@ class IDParser(compat_html_parser.HTMLParser):
         attrs = dict(attrs)
         if self.started:
             self.find_startpos(None)
-        if 'id' in attrs and attrs['id'] == self.id:
+        if self.attribute in attrs and attrs[self.attribute] == self.value:
             self.result = [tag]
             self.started = True
             self.watch_startpos = True
@@ -261,8 +287,12 @@ class IDParser(compat_html_parser.HTMLParser):
         return '\n'.join(lines).strip()
 
 def get_element_by_id(id, html):
-    """Return the content of the tag with the specified id in the passed HTML document"""
-    parser = IDParser(id)
+    """Return the content of the tag with the specified ID in the passed HTML document"""
+    return get_element_by_attribute("id", id, html)
+
+def get_element_by_attribute(attribute, value, html):
+    """Return the content of the tag with the specified attribute in the passed HTML document"""
+    parser = AttrParser(attribute, value)
     try:
         parser.loads(html)
     except compat_html_parser.HTMLParseError:
@@ -274,7 +304,8 @@ def clean_html(html):
     """Clean an HTML snippet into a readable string"""
     # Newline vs <br />
     html = html.replace('\n', ' ')
-    html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
     # Strip html tags
     html = re.sub('<.*?>', '', html)
     # Replace html entities
@@ -317,9 +348,10 @@ def timeconvert(timestr):
         timestamp = email.utils.mktime_tz(timetuple)
     return timestamp
 
-def sanitize_filename(s, restricted=False):
+def sanitize_filename(s, restricted=False, is_id=False):
     """Sanitizes a string so it could be used as part of a filename.
     If restricted is set, use a stricter subset of allowed characters.
+    Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
     """
     def replace_insane(char):
         if char == '?' or ord(char) < 32 or ord(char) == 127:
@@ -337,14 +369,15 @@ def sanitize_filename(s, restricted=False):
         return char
 
     result = u''.join(map(replace_insane, s))
-    while '__' in result:
-        result = result.replace('__', '_')
-    result = result.strip('_')
-    # Common case of "Foreign band name - English song title"
-    if restricted and result.startswith('-_'):
-        result = result[2:]
-    if not result:
-        result = '_'
+    if not is_id:
+        while '__' in result:
+            result = result.replace('__', '_')
+        result = result.strip('_')
+        # Common case of "Foreign band name - English song title"
+        if restricted and result.startswith('-_'):
+            result = result[2:]
+        if not result:
+            result = '_'
     return result
 
 def orderedSet(iterable):
@@ -383,6 +416,34 @@ def encodeFilename(s):
     else:
         return s.encode(sys.getfilesystemencoding(), 'ignore')
 
+def rsa_verify(message, signature, key):
+    from struct import pack
+    from hashlib import sha256
+    from sys import version_info
+    def b(x):
+        if version_info[0] == 2: return x
+        else: return x.encode('latin1')
+    assert(type(message) == type(b('')))
+    block_size = 0
+    n = key[0]
+    while n:
+        block_size += 1
+        n >>= 8
+    signature = pow(int(signature, 16), key[1], key[0])
+    raw_bytes = []
+    while signature:
+        raw_bytes.insert(0, pack("B", signature & 0xFF))
+        signature >>= 8
+    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
+    if signature[0:2] != b('\x00\x01'): return False
+    signature = signature[2:]
+    if not b('\x00') in signature: return False
+    signature = signature[signature.index(b('\x00'))+1:]
+    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
+    signature = signature[19:]
+    if signature != sha256(message).digest(): return False
+    return True
+
 class DownloadError(Exception):
     """Download Error exception.
 
@@ -439,14 +500,6 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.expected = expected
 
-
-class Trouble(Exception):
-    """Trouble helper exception
-
-    This is an exception to be handled with
-    FileDownloader.trouble
-    """
-
 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
 
@@ -504,3 +557,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
         return resp
+
+    https_request = http_request
+    https_response = http_response