Merge remote-tracking branch 'derrotebaron/master'

[youtube-dl.git] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index daf94abd1d541d77e8b74f0f17e6fde780b054e8..3536a5bd6f3216a62eaaec2ba11a08893c5e62ab 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -399,8 +399,9 @@ def formatSeconds(secs):
  def make_HTTPS_handler(params, **kwargs):
      opts_no_check_certificate = params.get('nocheckcertificate', False)
      if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
          if opts_no_check_certificate:
+            context.check_hostname = False
              context.verify_mode = ssl.CERT_NONE
          try:
              return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
@@ -723,6 +724,7 @@ def unified_strdate(date_str, day_first=True):
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
+        '%Y %m %d',
          '%Y-%m-%d',
          '%Y/%m/%d',
          '%Y/%m/%d %H:%M:%S',
@@ -1277,7 +1279,7 @@ def parse_duration(s):
      s = s.strip()
  
      m = re.match(
-        r'''(?ix)T?
+        r'''(?ix)(?:P?T)?
          (?:
              (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
              (?P<only_hours>[0-9.]+)\s*(?:hours?)|
@@ -1612,6 +1614,14 @@ def urlhandle_detect_ext(url_handle):
      except AttributeError:  # Python < 3
          getheader = url_handle.info().getheader
  
+    cd = getheader('Content-Disposition')
+    if cd:
+        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+        if m:
+            e = determine_ext(m.group('filename'), default_ext=None)
+            if e:
+                return e
+
      return getheader('Content-Type').split("/")[1]
  
  
@@ -1623,3 +1633,23 @@ def age_restricted(content_limit, age_limit):
      if content_limit is None:
          return False  # Content available for everyone
      return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)