youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_parse_qs,
  38     compat_str,
  39     compat_urllib_error,
  40     compat_urllib_parse,
  41     compat_urllib_parse_urlparse,
  42     compat_urllib_request,
  43     compat_urlparse,
  44 )
  45
  46
  47 # This is not clearly defined otherwise
  48 compiled_regex_type = type(re.compile(''))
  49
  50 std_headers = {
  51     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  52     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  53     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  54     'Accept-Encoding': 'gzip, deflate',
  55     'Accept-Language': 'en-us,en;q=0.5',
  56 }
  57
  58 def preferredencoding():
  59     """Get preferred encoding.
  60
  61     Returns the best encoding scheme for the system, based on
  62     locale.getpreferredencoding() and some further tweaks.
  63     """
  64     try:
  65         pref = locale.getpreferredencoding()
  66         'TEST'.encode(pref)
  67     except:
  68         pref = 'UTF-8'
  69
  70     return pref
  71
  72
  73 def write_json_file(obj, fn):
  74     """ Encode obj as JSON and write it to fn, atomically if possible """
  75
  76     fn = encodeFilename(fn)
  77     if sys.version_info < (3, 0) and sys.platform != 'win32':
  78         encoding = get_filesystem_encoding()
  79         # os.path.basename returns a bytes object, but NamedTemporaryFile
  80         # will fail if the filename contains non ascii characters unless we
  81         # use a unicode object
  82         path_basename = lambda f: os.path.basename(fn).decode(encoding)
  83         # the same for os.path.dirname
  84         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
  85     else:
  86         path_basename = os.path.basename
  87         path_dirname = os.path.dirname
  88
  89     args = {
  90         'suffix': '.tmp',
  91         'prefix': path_basename(fn) + '.',
  92         'dir': path_dirname(fn),
  93         'delete': False,
  94     }
  95
  96     # In Python 2.x, json.dump expects a bytestream.
  97     # In Python 3.x, it writes to a character stream
  98     if sys.version_info < (3, 0):
  99         args['mode'] = 'wb'
 100     else:
 101         args.update({
 102             'mode': 'w',
 103             'encoding': 'utf-8',
 104         })
 105
 106     tf = tempfile.NamedTemporaryFile(**args)
 107
 108     try:
 109         with tf:
 110             json.dump(obj, tf)
 111         if sys.platform == 'win32':
 112             # Need to remove existing file on Windows, else os.rename raises
 113             # WindowsError or FileExistsError.
 114             try:
 115                 os.unlink(fn)
 116             except OSError:
 117                 pass
 118         os.rename(tf.name, fn)
 119     except:
 120         try:
 121             os.remove(tf.name)
 122         except OSError:
 123             pass
 124         raise
 125
 126
 127 if sys.version_info >= (2, 7):
 128     def find_xpath_attr(node, xpath, key, val):
 129         """ Find the xpath xpath[@key=val] """
 130         assert re.match(r'^[a-zA-Z-]+$', key)
 131         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 132         expr = xpath + u"[@%s='%s']" % (key, val)
 133         return node.find(expr)
 134 else:
 135     def find_xpath_attr(node, xpath, key, val):
 136         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 137         # .//node does not match if a node is a direct child of . !
 138         if isinstance(xpath, unicode):
 139             xpath = xpath.encode('ascii')
 140
 141         for f in node.findall(xpath):
 142             if f.attrib.get(key) == val:
 143                 return f
 144         return None
 145
 146 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 147 # the namespace parameter
 148 def xpath_with_ns(path, ns_map):
 149     components = [c.split(':') for c in path.split('/')]
 150     replaced = []
 151     for c in components:
 152         if len(c) == 1:
 153             replaced.append(c[0])
 154         else:
 155             ns, tag = c
 156             replaced.append('{%s}%s' % (ns_map[ns], tag))
 157     return '/'.join(replaced)
 158
 159
 160 def xpath_text(node, xpath, name=None, fatal=False):
 161     if sys.version_info < (2, 7):  # Crazy 2.6
 162         xpath = xpath.encode('ascii')
 163
 164     n = node.find(xpath)
 165     if n is None:
 166         if fatal:
 167             name = xpath if name is None else name
 168             raise ExtractorError('Could not find XML element %s' % name)
 169         else:
 170             return None
 171     return n.text
 172
 173
 174 def get_element_by_id(id, html):
 175     """Return the content of the tag with the specified ID in the passed HTML document"""
 176     return get_element_by_attribute("id", id, html)
 177
 178
 179 def get_element_by_attribute(attribute, value, html):
 180     """Return the content of the tag with the specified attribute in the passed HTML document"""
 181
 182     m = re.search(r'''(?xs)
 183         <([a-zA-Z0-9:._-]+)
 184          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 185          \s+%s=['"]?%s['"]?
 186          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 187         \s*>
 188         (?P<content>.*?)
 189         </\1>
 190     ''' % (re.escape(attribute), re.escape(value)), html)
 191
 192     if not m:
 193         return None
 194     res = m.group('content')
 195
 196     if res.startswith('"') or res.startswith("'"):
 197         res = res[1:-1]
 198
 199     return unescapeHTML(res)
 200
 201
 202 def clean_html(html):
 203     """Clean an HTML snippet into a readable string"""
 204     # Newline vs <br />
 205     html = html.replace('\n', ' ')
 206     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 207     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 208     # Strip html tags
 209     html = re.sub('<.*?>', '', html)
 210     # Replace html entities
 211     html = unescapeHTML(html)
 212     return html.strip()
 213
 214
 215 def sanitize_open(filename, open_mode):
 216     """Try to open the given filename, and slightly tweak it if this fails.
 217
 218     Attempts to open the given filename. If this fails, it tries to change
 219     the filename slightly, step by step, until it's either able to open it
 220     or it fails and raises a final exception, like the standard open()
 221     function.
 222
 223     It returns the tuple (stream, definitive_file_name).
 224     """
 225     try:
 226         if filename == '-':
 227             if sys.platform == 'win32':
 228                 import msvcrt
 229                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 230             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 231         stream = open(encodeFilename(filename), open_mode)
 232         return (stream, filename)
 233     except (IOError, OSError) as err:
 234         if err.errno in (errno.EACCES,):
 235             raise
 236
 237         # In case of error, try to remove win32 forbidden chars
 238         alt_filename = os.path.join(
 239                         re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
 240                         for path_part in os.path.split(filename)
 241                        )
 242         if alt_filename == filename:
 243             raise
 244         else:
 245             # An exception here should be caught in the caller
 246             stream = open(encodeFilename(filename), open_mode)
 247             return (stream, alt_filename)
 248
 249
 250 def timeconvert(timestr):
 251     """Convert RFC 2822 defined time string into system timestamp"""
 252     timestamp = None
 253     timetuple = email.utils.parsedate_tz(timestr)
 254     if timetuple is not None:
 255         timestamp = email.utils.mktime_tz(timetuple)
 256     return timestamp
 257
 258 def sanitize_filename(s, restricted=False, is_id=False):
 259     """Sanitizes a string so it could be used as part of a filename.
 260     If restricted is set, use a stricter subset of allowed characters.
 261     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 262     """
 263     def replace_insane(char):
 264         if char == '?' or ord(char) < 32 or ord(char) == 127:
 265             return ''
 266         elif char == '"':
 267             return '' if restricted else '\''
 268         elif char == ':':
 269             return '_-' if restricted else ' -'
 270         elif char in '\\/|*<>':
 271             return '_'
 272         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 273             return '_'
 274         if restricted and ord(char) > 127:
 275             return '_'
 276         return char
 277
 278     result = ''.join(map(replace_insane, s))
 279     if not is_id:
 280         while '__' in result:
 281             result = result.replace('__', '_')
 282         result = result.strip('_')
 283         # Common case of "Foreign band name - English song title"
 284         if restricted and result.startswith('-_'):
 285             result = result[2:]
 286         if not result:
 287             result = '_'
 288     return result
 289
 290 def orderedSet(iterable):
 291     """ Remove all duplicates from the input iterable """
 292     res = []
 293     for el in iterable:
 294         if el not in res:
 295             res.append(el)
 296     return res
 297
 298
 299 def _htmlentity_transform(entity):
 300     """Transforms an HTML entity to a character."""
 301     # Known non-numeric HTML entity
 302     if entity in compat_html_entities.name2codepoint:
 303         return compat_chr(compat_html_entities.name2codepoint[entity])
 304
 305     mobj = re.match(r'#(x?[0-9]+)', entity)
 306     if mobj is not None:
 307         numstr = mobj.group(1)
 308         if numstr.startswith('x'):
 309             base = 16
 310             numstr = '0%s' % numstr
 311         else:
 312             base = 10
 313         return compat_chr(int(numstr, base))
 314
 315     # Unknown entity in name, return its literal representation
 316     return ('&%s;' % entity)
 317
 318
 319 def unescapeHTML(s):
 320     if s is None:
 321         return None
 322     assert type(s) == compat_str
 323
 324     return re.sub(
 325         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 326
 327
 328 def encodeFilename(s, for_subprocess=False):
 329     """
 330     @param s The name of the file
 331     """
 332
 333     assert type(s) == compat_str
 334
 335     # Python 3 has a Unicode API
 336     if sys.version_info >= (3, 0):
 337         return s
 338
 339     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 340         # Pass '' directly to use Unicode APIs on Windows 2000 and up
 341         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 342         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 343         if not for_subprocess:
 344             return s
 345         else:
 346             # For subprocess calls, encode with locale encoding
 347             # Refer to http://stackoverflow.com/a/9951851/35070
 348             encoding = preferredencoding()
 349     else:
 350         encoding = sys.getfilesystemencoding()
 351     if encoding is None:
 352         encoding = 'utf-8'
 353     return s.encode(encoding, 'ignore')
 354
 355
 356 def encodeArgument(s):
 357     if not isinstance(s, compat_str):
 358         # Legacy code that uses byte strings
 359         # Uncomment the following line after fixing all post processors
 360         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 361         s = s.decode('ascii')
 362     return encodeFilename(s, True)
 363
 364
 365 def decodeOption(optval):
 366     if optval is None:
 367         return optval
 368     if isinstance(optval, bytes):
 369         optval = optval.decode(preferredencoding())
 370
 371     assert isinstance(optval, compat_str)
 372     return optval
 373
 374 def formatSeconds(secs):
 375     if secs > 3600:
 376         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 377     elif secs > 60:
 378         return '%d:%02d' % (secs // 60, secs % 60)
 379     else:
 380         return '%d' % secs
 381
 382
 383 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 384     if sys.version_info < (3, 2):
 385         import httplib
 386
 387         class HTTPSConnectionV3(httplib.HTTPSConnection):
 388             def __init__(self, *args, **kwargs):
 389                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 390
 391             def connect(self):
 392                 sock = socket.create_connection((self.host, self.port), self.timeout)
 393                 if getattr(self, '_tunnel_host', False):
 394                     self.sock = sock
 395                     self._tunnel()
 396                 try:
 397                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 398                 except ssl.SSLError:
 399                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 400
 401         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 402             def https_open(self, req):
 403                 return self.do_open(HTTPSConnectionV3, req)
 404         return HTTPSHandlerV3(**kwargs)
 405     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 406         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 407         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 408         if opts_no_check_certificate:
 409             context.verify_mode = ssl.CERT_NONE
 410         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 411     else:  # Python < 3.4
 412         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 413         context.verify_mode = (ssl.CERT_NONE
 414                                if opts_no_check_certificate
 415                                else ssl.CERT_REQUIRED)
 416         context.set_default_verify_paths()
 417         try:
 418             context.load_default_certs()
 419         except AttributeError:
 420             pass  # Python < 3.4
 421         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 422
 423
 424 class ExtractorError(Exception):
 425     """Error during info extraction."""
 426     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 427         """ tb, if given, is the original traceback (so that it can be printed out).
 428         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 429         """
 430
 431         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 432             expected = True
 433         if video_id is not None:
 434             msg = video_id + ': ' + msg
 435         if cause:
 436             msg += ' (caused by %r)' % cause
 437         if not expected:
 438             if ytdl_is_updateable():
 439                 update_cmd = 'type  youtube-dl -U  to update'
 440             else:
 441                 update_cmd = 'see  https://yt-dl.org/update  on how to update'
 442             msg += '; please report this issue on https://yt-dl.org/bug .'
 443             msg += ' Make sure you are using the latest version; %s.' % update_cmd
 444             msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 445         super(ExtractorError, self).__init__(msg)
 446
 447         self.traceback = tb
 448         self.exc_info = sys.exc_info()  # preserve original exception
 449         self.cause = cause
 450         self.video_id = video_id
 451
 452     def format_traceback(self):
 453         if self.traceback is None:
 454             return None
 455         return ''.join(traceback.format_tb(self.traceback))
 456
 457
 458 class RegexNotFoundError(ExtractorError):
 459     """Error when a regex didn't match"""
 460     pass
 461
 462
 463 class DownloadError(Exception):
 464     """Download Error exception.
 465
 466     This exception may be thrown by FileDownloader objects if they are not
 467     configured to continue on errors. They will contain the appropriate
 468     error message.
 469     """
 470     def __init__(self, msg, exc_info=None):
 471         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 472         super(DownloadError, self).__init__(msg)
 473         self.exc_info = exc_info
 474
 475
 476 class SameFileError(Exception):
 477     """Same File exception.
 478
 479     This exception will be thrown by FileDownloader objects if they detect
 480     multiple files would have to be downloaded to the same file on disk.
 481     """
 482     pass
 483
 484
 485 class PostProcessingError(Exception):
 486     """Post Processing exception.
 487
 488     This exception may be raised by PostProcessor's .run() method to
 489     indicate an error in the postprocessing task.
 490     """
 491     def __init__(self, msg):
 492         self.msg = msg
 493
 494 class MaxDownloadsReached(Exception):
 495     """ --max-downloads limit has been reached. """
 496     pass
 497
 498
 499 class UnavailableVideoError(Exception):
 500     """Unavailable Format exception.
 501
 502     This exception will be thrown when a video is requested
 503     in a format that is not available for that video.
 504     """
 505     pass
 506
 507
 508 class ContentTooShortError(Exception):
 509     """Content Too Short exception.
 510
 511     This exception may be raised by FileDownloader objects when a file they
 512     download is too small for what the server announced first, indicating
 513     the connection was probably interrupted.
 514     """
 515     # Both in bytes
 516     downloaded = None
 517     expected = None
 518
 519     def __init__(self, downloaded, expected):
 520         self.downloaded = downloaded
 521         self.expected = expected
 522
 523 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 524     """Handler for HTTP requests and responses.
 525
 526     This class, when installed with an OpenerDirector, automatically adds
 527     the standard headers to every HTTP request and handles gzipped and
 528     deflated responses from web servers. If compression is to be avoided in
 529     a particular request, the original request in the program code only has
 530     to include the HTTP header "Youtubedl-No-Compression", which will be
 531     removed before making the real request.
 532
 533     Part of this code was copied from:
 534
 535     http://techknack.net/python-urllib2-handlers/
 536
 537     Andrew Rowls, the author of that code, agreed to release it to the
 538     public domain.
 539     """
 540
 541     @staticmethod
 542     def deflate(data):
 543         try:
 544             return zlib.decompress(data, -zlib.MAX_WBITS)
 545         except zlib.error:
 546             return zlib.decompress(data)
 547
 548     @staticmethod
 549     def addinfourl_wrapper(stream, headers, url, code):
 550         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 551             return compat_urllib_request.addinfourl(stream, headers, url, code)
 552         ret = compat_urllib_request.addinfourl(stream, headers, url)
 553         ret.code = code
 554         return ret
 555
 556     def http_request(self, req):
 557         for h, v in std_headers.items():
 558             if h not in req.headers:
 559                 req.add_header(h, v)
 560         if 'Youtubedl-no-compression' in req.headers:
 561             if 'Accept-encoding' in req.headers:
 562                 del req.headers['Accept-encoding']
 563             del req.headers['Youtubedl-no-compression']
 564         if 'Youtubedl-user-agent' in req.headers:
 565             if 'User-agent' in req.headers:
 566                 del req.headers['User-agent']
 567             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 568             del req.headers['Youtubedl-user-agent']
 569
 570         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 571             # Python 2.6 is brain-dead when it comes to fragments
 572             req._Request__original = req._Request__original.partition('#')[0]
 573             req._Request__r_type = req._Request__r_type.partition('#')[0]
 574
 575         return req
 576
 577     def http_response(self, req, resp):
 578         old_resp = resp
 579         # gzip
 580         if resp.headers.get('Content-encoding', '') == 'gzip':
 581             content = resp.read()
 582             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 583             try:
 584                 uncompressed = io.BytesIO(gz.read())
 585             except IOError as original_ioerror:
 586                 # There may be junk add the end of the file
 587                 # See http://stackoverflow.com/q/4928560/35070 for details
 588                 for i in range(1, 1024):
 589                     try:
 590                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 591                         uncompressed = io.BytesIO(gz.read())
 592                     except IOError:
 593                         continue
 594                     break
 595                 else:
 596                     raise original_ioerror
 597             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 598             resp.msg = old_resp.msg
 599         # deflate
 600         if resp.headers.get('Content-encoding', '') == 'deflate':
 601             gz = io.BytesIO(self.deflate(resp.read()))
 602             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 603             resp.msg = old_resp.msg
 604         return resp
 605
 606     https_request = http_request
 607     https_response = http_response
 608
 609
 610 def parse_iso8601(date_str, delimiter='T'):
 611     """ Return a UNIX timestamp from the given date """
 612
 613     if date_str is None:
 614         return None
 615
 616     m = re.search(
 617         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 618         date_str)
 619     if not m:
 620         timezone = datetime.timedelta()
 621     else:
 622         date_str = date_str[:-len(m.group(0))]
 623         if not m.group('sign'):
 624             timezone = datetime.timedelta()
 625         else:
 626             sign = 1 if m.group('sign') == '+' else -1
 627             timezone = datetime.timedelta(
 628                 hours=sign * int(m.group('hours')),
 629                 minutes=sign * int(m.group('minutes')))
 630     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 631     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 632     return calendar.timegm(dt.timetuple())
 633
 634
 635 def unified_strdate(date_str):
 636     """Return a string with the date in the format YYYYMMDD"""
 637
 638     if date_str is None:
 639         return None
 640
 641     upload_date = None
 642     #Replace commas
 643     date_str = date_str.replace(',', ' ')
 644     # %z (UTC offset) is only supported in python>=3.2
 645     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 646     format_expressions = [
 647         '%d %B %Y',
 648         '%d %b %Y',
 649         '%B %d %Y',
 650         '%b %d %Y',
 651         '%b %dst %Y %I:%M%p',
 652         '%b %dnd %Y %I:%M%p',
 653         '%b %dth %Y %I:%M%p',
 654         '%Y-%m-%d',
 655         '%Y/%m/%d',
 656         '%d.%m.%Y',
 657         '%d/%m/%Y',
 658         '%d/%m/%y',
 659         '%Y/%m/%d %H:%M:%S',
 660         '%d/%m/%Y %H:%M:%S',
 661         '%Y-%m-%d %H:%M:%S',
 662         '%Y-%m-%d %H:%M:%S.%f',
 663         '%d.%m.%Y %H:%M',
 664         '%d.%m.%Y %H.%M',
 665         '%Y-%m-%dT%H:%M:%SZ',
 666         '%Y-%m-%dT%H:%M:%S.%fZ',
 667         '%Y-%m-%dT%H:%M:%S.%f0Z',
 668         '%Y-%m-%dT%H:%M:%S',
 669         '%Y-%m-%dT%H:%M:%S.%f',
 670         '%Y-%m-%dT%H:%M',
 671     ]
 672     for expression in format_expressions:
 673         try:
 674             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 675         except ValueError:
 676             pass
 677     if upload_date is None:
 678         timetuple = email.utils.parsedate_tz(date_str)
 679         if timetuple:
 680             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 681     return upload_date
 682
 683 def determine_ext(url, default_ext='unknown_video'):
 684     if url is None:
 685         return default_ext
 686     guess = url.partition('?')[0].rpartition('.')[2]
 687     if re.match(r'^[A-Za-z0-9]+$', guess):
 688         return guess
 689     else:
 690         return default_ext
 691
 692 def subtitles_filename(filename, sub_lang, sub_format):
 693     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
 694
 695 def date_from_str(date_str):
 696     """
 697     Return a datetime object from a string in the format YYYYMMDD or
 698     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 699     today = datetime.date.today()
 700     if date_str == 'now'or date_str == 'today':
 701         return today
 702     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 703     if match is not None:
 704         sign = match.group('sign')
 705         time = int(match.group('time'))
 706         if sign == '-':
 707             time = -time
 708         unit = match.group('unit')
 709         #A bad aproximation?
 710         if unit == 'month':
 711             unit = 'day'
 712             time *= 30
 713         elif unit == 'year':
 714             unit = 'day'
 715             time *= 365
 716         unit += 's'
 717         delta = datetime.timedelta(**{unit: time})
 718         return today + delta
 719     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 720
 721 def hyphenate_date(date_str):
 722     """
 723     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 724     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 725     if match is not None:
 726         return '-'.join(match.groups())
 727     else:
 728         return date_str
 729
 730 class DateRange(object):
 731     """Represents a time interval between two dates"""
 732     def __init__(self, start=None, end=None):
 733         """start and end must be strings in the format accepted by date"""
 734         if start is not None:
 735             self.start = date_from_str(start)
 736         else:
 737             self.start = datetime.datetime.min.date()
 738         if end is not None:
 739             self.end = date_from_str(end)
 740         else:
 741             self.end = datetime.datetime.max.date()
 742         if self.start > self.end:
 743             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 744     @classmethod
 745     def day(cls, day):
 746         """Returns a range that only contains the given day"""
 747         return cls(day,day)
 748     def __contains__(self, date):
 749         """Check if the date is in the range"""
 750         if not isinstance(date, datetime.date):
 751             date = date_from_str(date)
 752         return self.start <= date <= self.end
 753     def __str__(self):
 754         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 755
 756
 757 def platform_name():
 758     """ Returns the platform name as a compat_str """
 759     res = platform.platform()
 760     if isinstance(res, bytes):
 761         res = res.decode(preferredencoding())
 762
 763     assert isinstance(res, compat_str)
 764     return res
 765
 766
 767 def _windows_write_string(s, out):
 768     """ Returns True if the string was written using special methods,
 769     False if it has yet to be written out."""
 770     # Adapted from http://stackoverflow.com/a/3259271/35070
 771
 772     import ctypes
 773     import ctypes.wintypes
 774
 775     WIN_OUTPUT_IDS = {
 776         1: -11,
 777         2: -12,
 778     }
 779
 780     try:
 781         fileno = out.fileno()
 782     except AttributeError:
 783         # If the output stream doesn't have a fileno, it's virtual
 784         return False
 785     if fileno not in WIN_OUTPUT_IDS:
 786         return False
 787
 788     GetStdHandle = ctypes.WINFUNCTYPE(
 789         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 790         ("GetStdHandle", ctypes.windll.kernel32))
 791     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 792
 793     WriteConsoleW = ctypes.WINFUNCTYPE(
 794         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 795         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 796         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 797     written = ctypes.wintypes.DWORD(0)
 798
 799     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 800     FILE_TYPE_CHAR = 0x0002
 801     FILE_TYPE_REMOTE = 0x8000
 802     GetConsoleMode = ctypes.WINFUNCTYPE(
 803         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 804         ctypes.POINTER(ctypes.wintypes.DWORD))(
 805         ("GetConsoleMode", ctypes.windll.kernel32))
 806     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 807
 808     def not_a_console(handle):
 809         if handle == INVALID_HANDLE_VALUE or handle is None:
 810             return True
 811         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 812                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 813
 814     if not_a_console(h):
 815         return False
 816
 817     def next_nonbmp_pos(s):
 818         try:
 819             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 820         except StopIteration:
 821             return len(s)
 822
 823     while s:
 824         count = min(next_nonbmp_pos(s), 1024)
 825
 826         ret = WriteConsoleW(
 827             h, s, count if count else 2, ctypes.byref(written), None)
 828         if ret == 0:
 829             raise OSError('Failed to write string')
 830         if not count:  # We just wrote a non-BMP character
 831             assert written.value == 2
 832             s = s[1:]
 833         else:
 834             assert written.value > 0
 835             s = s[written.value:]
 836     return True
 837
 838
 839 def write_string(s, out=None, encoding=None):
 840     if out is None:
 841         out = sys.stderr
 842     assert type(s) == compat_str
 843
 844     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 845         if _windows_write_string(s, out):
 846             return
 847
 848     if ('b' in getattr(out, 'mode', '') or
 849             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 850         byt = s.encode(encoding or preferredencoding(), 'ignore')
 851         out.write(byt)
 852     elif hasattr(out, 'buffer'):
 853         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 854         byt = s.encode(enc, 'ignore')
 855         out.buffer.write(byt)
 856     else:
 857         out.write(s)
 858     out.flush()
 859
 860
 861 def bytes_to_intlist(bs):
 862     if not bs:
 863         return []
 864     if isinstance(bs[0], int):  # Python 3
 865         return list(bs)
 866     else:
 867         return [ord(c) for c in bs]
 868
 869
 870 def intlist_to_bytes(xs):
 871     if not xs:
 872         return b''
 873     return struct_pack('%dB' % len(xs), *xs)
 874
 875
 876 # Cross-platform file locking
 877 if sys.platform == 'win32':
 878     import ctypes.wintypes
 879     import msvcrt
 880
 881     class OVERLAPPED(ctypes.Structure):
 882         _fields_ = [
 883             ('Internal', ctypes.wintypes.LPVOID),
 884             ('InternalHigh', ctypes.wintypes.LPVOID),
 885             ('Offset', ctypes.wintypes.DWORD),
 886             ('OffsetHigh', ctypes.wintypes.DWORD),
 887             ('hEvent', ctypes.wintypes.HANDLE),
 888         ]
 889
 890     kernel32 = ctypes.windll.kernel32
 891     LockFileEx = kernel32.LockFileEx
 892     LockFileEx.argtypes = [
 893         ctypes.wintypes.HANDLE,     # hFile
 894         ctypes.wintypes.DWORD,      # dwFlags
 895         ctypes.wintypes.DWORD,      # dwReserved
 896         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 897         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 898         ctypes.POINTER(OVERLAPPED)  # Overlapped
 899     ]
 900     LockFileEx.restype = ctypes.wintypes.BOOL
 901     UnlockFileEx = kernel32.UnlockFileEx
 902     UnlockFileEx.argtypes = [
 903         ctypes.wintypes.HANDLE,     # hFile
 904         ctypes.wintypes.DWORD,      # dwReserved
 905         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 906         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 907         ctypes.POINTER(OVERLAPPED)  # Overlapped
 908     ]
 909     UnlockFileEx.restype = ctypes.wintypes.BOOL
 910     whole_low = 0xffffffff
 911     whole_high = 0x7fffffff
 912
 913     def _lock_file(f, exclusive):
 914         overlapped = OVERLAPPED()
 915         overlapped.Offset = 0
 916         overlapped.OffsetHigh = 0
 917         overlapped.hEvent = 0
 918         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 919         handle = msvcrt.get_osfhandle(f.fileno())
 920         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 921                           whole_low, whole_high, f._lock_file_overlapped_p):
 922             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 923
 924     def _unlock_file(f):
 925         assert f._lock_file_overlapped_p
 926         handle = msvcrt.get_osfhandle(f.fileno())
 927         if not UnlockFileEx(handle, 0,
 928                             whole_low, whole_high, f._lock_file_overlapped_p):
 929             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 930
 931 else:
 932     import fcntl
 933
 934     def _lock_file(f, exclusive):
 935         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 936
 937     def _unlock_file(f):
 938         fcntl.flock(f, fcntl.LOCK_UN)
 939
 940
 941 class locked_file(object):
 942     def __init__(self, filename, mode, encoding=None):
 943         assert mode in ['r', 'a', 'w']
 944         self.f = io.open(filename, mode, encoding=encoding)
 945         self.mode = mode
 946
 947     def __enter__(self):
 948         exclusive = self.mode != 'r'
 949         try:
 950             _lock_file(self.f, exclusive)
 951         except IOError:
 952             self.f.close()
 953             raise
 954         return self
 955
 956     def __exit__(self, etype, value, traceback):
 957         try:
 958             _unlock_file(self.f)
 959         finally:
 960             self.f.close()
 961
 962     def __iter__(self):
 963         return iter(self.f)
 964
 965     def write(self, *args):
 966         return self.f.write(*args)
 967
 968     def read(self, *args):
 969         return self.f.read(*args)
 970
 971
 972 def get_filesystem_encoding():
 973     encoding = sys.getfilesystemencoding()
 974     return encoding if encoding is not None else 'utf-8'
 975
 976
 977 def shell_quote(args):
 978     quoted_args = []
 979     encoding = get_filesystem_encoding()
 980     for a in args:
 981         if isinstance(a, bytes):
 982             # We may get a filename encoded with 'encodeFilename'
 983             a = a.decode(encoding)
 984         quoted_args.append(pipes.quote(a))
 985     return ' '.join(quoted_args)
 986
 987
 988 def takewhile_inclusive(pred, seq):
 989     """ Like itertools.takewhile, but include the latest evaluated element
 990         (the first element so that Not pred(e)) """
 991     for e in seq:
 992         yield e
 993         if not pred(e):
 994             return
 995
 996
 997 def smuggle_url(url, data):
 998     """ Pass additional data in a URL for internal use. """
 999
1000     sdata = compat_urllib_parse.urlencode(
1001         {'__youtubedl_smuggle': json.dumps(data)})
1002     return url + '#' + sdata
1003
1004
1005 def unsmuggle_url(smug_url, default=None):
1006     if not '#__youtubedl_smuggle' in smug_url:
1007         return smug_url, default
1008     url, _, sdata = smug_url.rpartition('#')
1009     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1010     data = json.loads(jsond)
1011     return url, data
1012
1013
1014 def format_bytes(bytes):
1015     if bytes is None:
1016         return 'N/A'
1017     if type(bytes) is str:
1018         bytes = float(bytes)
1019     if bytes == 0.0:
1020         exponent = 0
1021     else:
1022         exponent = int(math.log(bytes, 1024.0))
1023     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1024     converted = float(bytes) / float(1024 ** exponent)
1025     return '%.2f%s' % (converted, suffix)
1026
1027
1028 def get_term_width():
1029     columns = compat_getenv('COLUMNS', None)
1030     if columns:
1031         return int(columns)
1032
1033     try:
1034         sp = subprocess.Popen(
1035             ['stty', 'size'],
1036             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1037         out, err = sp.communicate()
1038         return int(out.split()[1])
1039     except:
1040         pass
1041     return None
1042
1043
1044 def month_by_name(name):
1045     """ Return the number of a month by (locale-independently) English name """
1046
1047     ENGLISH_NAMES = [
1048         'January', 'February', 'March', 'April', 'May', 'June',
1049         'July', 'August', 'September', 'October', 'November', 'December']
1050     try:
1051         return ENGLISH_NAMES.index(name) + 1
1052     except ValueError:
1053         return None
1054
1055
1056 def fix_xml_ampersands(xml_str):
1057     """Replace all the '&' by '&amp;' in XML"""
1058     return re.sub(
1059         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1060         '&amp;',
1061         xml_str)
1062
1063
1064 def setproctitle(title):
1065     assert isinstance(title, compat_str)
1066     try:
1067         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1068     except OSError:
1069         return
1070     title_bytes = title.encode('utf-8')
1071     buf = ctypes.create_string_buffer(len(title_bytes))
1072     buf.value = title_bytes
1073     try:
1074         libc.prctl(15, buf, 0, 0, 0)
1075     except AttributeError:
1076         return  # Strange libc, just skip this
1077
1078
1079 def remove_start(s, start):
1080     if s.startswith(start):
1081         return s[len(start):]
1082     return s
1083
1084
1085 def remove_end(s, end):
1086     if s.endswith(end):
1087         return s[:-len(end)]
1088     return s
1089
1090
1091 def url_basename(url):
1092     path = compat_urlparse.urlparse(url).path
1093     return path.strip('/').split('/')[-1]
1094
1095
1096 class HEADRequest(compat_urllib_request.Request):
1097     def get_method(self):
1098         return "HEAD"
1099
1100
1101 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1102     if get_attr:
1103         if v is not None:
1104             v = getattr(v, get_attr, None)
1105     if v == '':
1106         v = None
1107     return default if v is None else (int(v) * invscale // scale)
1108
1109
1110 def str_or_none(v, default=None):
1111     return default if v is None else compat_str(v)
1112
1113
1114 def str_to_int(int_str):
1115     """ A more relaxed version of int_or_none """
1116     if int_str is None:
1117         return None
1118     int_str = re.sub(r'[,\.\+]', '', int_str)
1119     return int(int_str)
1120
1121
1122 def float_or_none(v, scale=1, invscale=1, default=None):
1123     return default if v is None else (float(v) * invscale / scale)
1124
1125
1126 def parse_duration(s):
1127     if s is None:
1128         return None
1129
1130     s = s.strip()
1131
1132     m = re.match(
1133         r'''(?ix)T?
1134             (?:
1135                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1136                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1137             )?
1138             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1139     if not m:
1140         return None
1141     res = int(m.group('secs'))
1142     if m.group('mins'):
1143         res += int(m.group('mins')) * 60
1144         if m.group('hours'):
1145             res += int(m.group('hours')) * 60 * 60
1146     if m.group('ms'):
1147         res += float(m.group('ms'))
1148     return res
1149
1150
1151 def prepend_extension(filename, ext):
1152     name, real_ext = os.path.splitext(filename)
1153     return '{0}.{1}{2}'.format(name, ext, real_ext)
1154
1155
1156 def check_executable(exe, args=[]):
1157     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1158     args can be a list of arguments for a short output (like -version) """
1159     try:
1160         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1161     except OSError:
1162         return False
1163     return exe
1164
1165
1166 def get_exe_version(exe, args=['--version'],
1167                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1168                     unrecognized='present'):
1169     """ Returns the version of the specified executable,
1170     or False if the executable is not present """
1171     try:
1172         out, err = subprocess.Popen(
1173             [exe] + args,
1174             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1175     except OSError:
1176         return False
1177     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1178     m = re.search(version_re, firstline)
1179     if m:
1180         return m.group(1)
1181     else:
1182         return unrecognized
1183
1184
1185 class PagedList(object):
1186     def __len__(self):
1187         # This is only useful for tests
1188         return len(self.getslice())
1189
1190
1191 class OnDemandPagedList(PagedList):
1192     def __init__(self, pagefunc, pagesize):
1193         self._pagefunc = pagefunc
1194         self._pagesize = pagesize
1195
1196     def getslice(self, start=0, end=None):
1197         res = []
1198         for pagenum in itertools.count(start // self._pagesize):
1199             firstid = pagenum * self._pagesize
1200             nextfirstid = pagenum * self._pagesize + self._pagesize
1201             if start >= nextfirstid:
1202                 continue
1203
1204             page_results = list(self._pagefunc(pagenum))
1205
1206             startv = (
1207                 start % self._pagesize
1208                 if firstid <= start < nextfirstid
1209                 else 0)
1210
1211             endv = (
1212                 ((end - 1) % self._pagesize) + 1
1213                 if (end is not None and firstid <= end <= nextfirstid)
1214                 else None)
1215
1216             if startv != 0 or endv is not None:
1217                 page_results = page_results[startv:endv]
1218             res.extend(page_results)
1219
1220             # A little optimization - if current page is not "full", ie. does
1221             # not contain page_size videos then we can assume that this page
1222             # is the last one - there are no more ids on further pages -
1223             # i.e. no need to query again.
1224             if len(page_results) + startv < self._pagesize:
1225                 break
1226
1227             # If we got the whole page, but the next page is not interesting,
1228             # break out early as well
1229             if end == nextfirstid:
1230                 break
1231         return res
1232
1233
1234 class InAdvancePagedList(PagedList):
1235     def __init__(self, pagefunc, pagecount, pagesize):
1236         self._pagefunc = pagefunc
1237         self._pagecount = pagecount
1238         self._pagesize = pagesize
1239
1240     def getslice(self, start=0, end=None):
1241         res = []
1242         start_page = start // self._pagesize
1243         end_page = (
1244             self._pagecount if end is None else (end // self._pagesize + 1))
1245         skip_elems = start - start_page * self._pagesize
1246         only_more = None if end is None else end - start
1247         for pagenum in range(start_page, end_page):
1248             page = list(self._pagefunc(pagenum))
1249             if skip_elems:
1250                 page = page[skip_elems:]
1251                 skip_elems = None
1252             if only_more is not None:
1253                 if len(page) < only_more:
1254                     only_more -= len(page)
1255                 else:
1256                     page = page[:only_more]
1257                     res.extend(page)
1258                     break
1259             res.extend(page)
1260         return res
1261
1262
1263 def uppercase_escape(s):
1264     unicode_escape = codecs.getdecoder('unicode_escape')
1265     return re.sub(
1266         r'\\U[0-9a-fA-F]{8}',
1267         lambda m: unicode_escape(m.group(0))[0],
1268         s)
1269
1270
1271 def escape_rfc3986(s):
1272     """Escape non-ASCII characters as suggested by RFC 3986"""
1273     if sys.version_info < (3, 0) and isinstance(s, unicode):
1274         s = s.encode('utf-8')
1275     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1276
1277
1278 def escape_url(url):
1279     """Escape URL as suggested by RFC 3986"""
1280     url_parsed = compat_urllib_parse_urlparse(url)
1281     return url_parsed._replace(
1282         path=escape_rfc3986(url_parsed.path),
1283         params=escape_rfc3986(url_parsed.params),
1284         query=escape_rfc3986(url_parsed.query),
1285         fragment=escape_rfc3986(url_parsed.fragment)
1286     ).geturl()
1287
1288 try:
1289     struct.pack('!I', 0)
1290 except TypeError:
1291     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1292     def struct_pack(spec, *args):
1293         if isinstance(spec, compat_str):
1294             spec = spec.encode('ascii')
1295         return struct.pack(spec, *args)
1296
1297     def struct_unpack(spec, *args):
1298         if isinstance(spec, compat_str):
1299             spec = spec.encode('ascii')
1300         return struct.unpack(spec, *args)
1301 else:
1302     struct_pack = struct.pack
1303     struct_unpack = struct.unpack
1304
1305
1306 def read_batch_urls(batch_fd):
1307     def fixup(url):
1308         if not isinstance(url, compat_str):
1309             url = url.decode('utf-8', 'replace')
1310         BOM_UTF8 = '\xef\xbb\xbf'
1311         if url.startswith(BOM_UTF8):
1312             url = url[len(BOM_UTF8):]
1313         url = url.strip()
1314         if url.startswith(('#', ';', ']')):
1315             return False
1316         return url
1317
1318     with contextlib.closing(batch_fd) as fd:
1319         return [url for url in map(fixup, fd) if url]
1320
1321
1322 def urlencode_postdata(*args, **kargs):
1323     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1324
1325
1326 try:
1327     etree_iter = xml.etree.ElementTree.Element.iter
1328 except AttributeError:  # Python <=2.6
1329     etree_iter = lambda n: n.findall('.//*')
1330
1331
1332 def parse_xml(s):
1333     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1334         def doctype(self, name, pubid, system):
1335             pass  # Ignore doctypes
1336
1337     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1338     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1339     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1340     # Fix up XML parser in Python 2.x
1341     if sys.version_info < (3, 0):
1342         for n in etree_iter(tree):
1343             if n.text is not None:
1344                 if not isinstance(n.text, compat_str):
1345                     n.text = n.text.decode('utf-8')
1346     return tree
1347
1348
1349 US_RATINGS = {
1350     'G': 0,
1351     'PG': 10,
1352     'PG-13': 13,
1353     'R': 16,
1354     'NC': 18,
1355 }
1356
1357
1358 def parse_age_limit(s):
1359     if s is None:
1360         return None
1361     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1362     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1363
1364
1365 def strip_jsonp(code):
1366     return re.sub(
1367         r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1368
1369
1370 def js_to_json(code):
1371     def fix_kv(m):
1372         v = m.group(0)
1373         if v in ('true', 'false', 'null'):
1374             return v
1375         if v.startswith('"'):
1376             return v
1377         if v.startswith("'"):
1378             v = v[1:-1]
1379             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1380                 '\\\\': '\\\\',
1381                 "\\'": "'",
1382                 '"': '\\"',
1383             }[m.group(0)], v)
1384         return '"%s"' % v
1385
1386     res = re.sub(r'''(?x)
1387         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1388         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1389         [a-zA-Z_][a-zA-Z_0-9]*
1390         ''', fix_kv, code)
1391     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1392     return res
1393
1394
1395 def qualities(quality_ids):
1396     """ Get a numeric quality value out of a list of possible values """
1397     def q(qid):
1398         try:
1399             return quality_ids.index(qid)
1400         except ValueError:
1401             return -1
1402     return q
1403
1404
1405 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1406
1407
1408 def limit_length(s, length):
1409     """ Add ellipses to overly long strings """
1410     if s is None:
1411         return None
1412     ELLIPSES = '...'
1413     if len(s) > length:
1414         return s[:length - len(ELLIPSES)] + ELLIPSES
1415     return s
1416
1417
1418 def version_tuple(v):
1419     return [int(e) for e in v.split('.')]
1420
1421
1422 def is_outdated_version(version, limit, assume_new=True):
1423     if not version:
1424         return not assume_new
1425     try:
1426         return version_tuple(version) < version_tuple(limit)
1427     except ValueError:
1428         return not assume_new
1429
1430
1431 def ytdl_is_updateable():
1432     """ Returns if youtube-dl can be updated with -U """
1433     from zipimport import zipimporter
1434
1435     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')