utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import errno
   5 import gzip
   6 import io
   7 import json
   8 import locale
   9 import os
  10 import re
  11 import sys
  12 import traceback
  13 import zlib
  14 import email.utils
  15 import socket
  16 import datetime
  17
  18 try:
  19     import urllib.request as compat_urllib_request
  20 except ImportError: # Python 2
  21     import urllib2 as compat_urllib_request
  22
  23 try:
  24     import urllib.error as compat_urllib_error
  25 except ImportError: # Python 2
  26     import urllib2 as compat_urllib_error
  27
  28 try:
  29     import urllib.parse as compat_urllib_parse
  30 except ImportError: # Python 2
  31     import urllib as compat_urllib_parse
  32
  33 try:
  34     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  35 except ImportError: # Python 2
  36     from urlparse import urlparse as compat_urllib_parse_urlparse
  37
  38 try:
  39     import http.cookiejar as compat_cookiejar
  40 except ImportError: # Python 2
  41     import cookielib as compat_cookiejar
  42
  43 try:
  44     import html.entities as compat_html_entities
  45 except ImportError: # Python 2
  46     import htmlentitydefs as compat_html_entities
  47
  48 try:
  49     import html.parser as compat_html_parser
  50 except ImportError: # Python 2
  51     import HTMLParser as compat_html_parser
  52
  53 try:
  54     import http.client as compat_http_client
  55 except ImportError: # Python 2
  56     import httplib as compat_http_client
  57
  58 try:
  59     from subprocess import DEVNULL
  60     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  61 except ImportError:
  62     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  63
  64 try:
  65     from urllib.parse import parse_qs as compat_parse_qs
  66 except ImportError: # Python 2
  67     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  68     # Python 2's version is apparently totally broken
  69     def _unquote(string, encoding='utf-8', errors='replace'):
  70         if string == '':
  71             return string
  72         res = string.split('%')
  73         if len(res) == 1:
  74             return string
  75         if encoding is None:
  76             encoding = 'utf-8'
  77         if errors is None:
  78             errors = 'replace'
  79         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  80         pct_sequence = b''
  81         string = res[0]
  82         for item in res[1:]:
  83             try:
  84                 if not item:
  85                     raise ValueError
  86                 pct_sequence += item[:2].decode('hex')
  87                 rest = item[2:]
  88                 if not rest:
  89                     # This segment was just a single percent-encoded character.
  90                     # May be part of a sequence of code units, so delay decoding.
  91                     # (Stored in pct_sequence).
  92                     continue
  93             except ValueError:
  94                 rest = '%' + item
  95             # Encountered non-percent-encoded characters. Flush the current
  96             # pct_sequence.
  97             string += pct_sequence.decode(encoding, errors) + rest
  98             pct_sequence = b''
  99         if pct_sequence:
 100             # Flush the final pct_sequence
 101             string += pct_sequence.decode(encoding, errors)
 102         return string
 103
 104     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 105                 encoding='utf-8', errors='replace'):
 106         qs, _coerce_result = qs, unicode
 107         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 108         r = []
 109         for name_value in pairs:
 110             if not name_value and not strict_parsing:
 111                 continue
 112             nv = name_value.split('=', 1)
 113             if len(nv) != 2:
 114                 if strict_parsing:
 115                     raise ValueError("bad query field: %r" % (name_value,))
 116                 # Handle case of a control-name with no equal sign
 117                 if keep_blank_values:
 118                     nv.append('')
 119                 else:
 120                     continue
 121             if len(nv[1]) or keep_blank_values:
 122                 name = nv[0].replace('+', ' ')
 123                 name = _unquote(name, encoding=encoding, errors=errors)
 124                 name = _coerce_result(name)
 125                 value = nv[1].replace('+', ' ')
 126                 value = _unquote(value, encoding=encoding, errors=errors)
 127                 value = _coerce_result(value)
 128                 r.append((name, value))
 129         return r
 130
 131     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 132                 encoding='utf-8', errors='replace'):
 133         parsed_result = {}
 134         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 135                         encoding=encoding, errors=errors)
 136         for name, value in pairs:
 137             if name in parsed_result:
 138                 parsed_result[name].append(value)
 139             else:
 140                 parsed_result[name] = [value]
 141         return parsed_result
 142
 143 try:
 144     compat_str = unicode # Python 2
 145 except NameError:
 146     compat_str = str
 147
 148 try:
 149     compat_chr = unichr # Python 2
 150 except NameError:
 151     compat_chr = chr
 152
 153 def compat_ord(c):
 154     if type(c) is int: return c
 155     else: return ord(c)
 156
 157 # This is not clearly defined otherwise
 158 compiled_regex_type = type(re.compile(''))
 159
 160 std_headers = {
 161     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 162     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 163     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 164     'Accept-Encoding': 'gzip, deflate',
 165     'Accept-Language': 'en-us,en;q=0.5',
 166 }
 167
 168 def preferredencoding():
 169     """Get preferred encoding.
 170
 171     Returns the best encoding scheme for the system, based on
 172     locale.getpreferredencoding() and some further tweaks.
 173     """
 174     try:
 175         pref = locale.getpreferredencoding()
 176         u'TEST'.encode(pref)
 177     except:
 178         pref = 'UTF-8'
 179
 180     return pref
 181
 182 if sys.version_info < (3,0):
 183     def compat_print(s):
 184         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 185 else:
 186     def compat_print(s):
 187         assert type(s) == type(u'')
 188         print(s)
 189
 190 # In Python 2.x, json.dump expects a bytestream.
 191 # In Python 3.x, it writes to a character stream
 192 if sys.version_info < (3,0):
 193     def write_json_file(obj, fn):
 194         with open(fn, 'wb') as f:
 195             json.dump(obj, f)
 196 else:
 197     def write_json_file(obj, fn):
 198         with open(fn, 'w', encoding='utf-8') as f:
 199             json.dump(obj, f)
 200
 201 if sys.version_info >= (2,7):
 202     def find_xpath_attr(node, xpath, key, val):
 203         """ Find the xpath xpath[@key=val] """
 204         assert re.match(r'^[a-z]+$', key)
 205         assert re.match(r'^[a-z]*$', val)
 206         expr = xpath + u"[@%s='%s']" % (key, val)
 207         return node.find(expr)
 208 else:
 209     def find_xpath_attr(node, xpath, key, val):
 210         for f in node.findall(xpath):
 211             if f.attrib.get(key) == val:
 212                 return f
 213         return None
 214
 215 def htmlentity_transform(matchobj):
 216     """Transforms an HTML entity to a character.
 217
 218     This function receives a match object and is intended to be used with
 219     the re.sub() function.
 220     """
 221     entity = matchobj.group(1)
 222
 223     # Known non-numeric HTML entity
 224     if entity in compat_html_entities.name2codepoint:
 225         return compat_chr(compat_html_entities.name2codepoint[entity])
 226
 227     mobj = re.match(u'(?u)#(x?\\d+)', entity)
 228     if mobj is not None:
 229         numstr = mobj.group(1)
 230         if numstr.startswith(u'x'):
 231             base = 16
 232             numstr = u'0%s' % numstr
 233         else:
 234             base = 10
 235         return compat_chr(int(numstr, base))
 236
 237     # Unknown entity in name, return its literal representation
 238     return (u'&%s;' % entity)
 239
 240 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 241 class AttrParser(compat_html_parser.HTMLParser):
 242     """Modified HTMLParser that isolates a tag with the specified attribute"""
 243     def __init__(self, attribute, value):
 244         self.attribute = attribute
 245         self.value = value
 246         self.result = None
 247         self.started = False
 248         self.depth = {}
 249         self.html = None
 250         self.watch_startpos = False
 251         self.error_count = 0
 252         compat_html_parser.HTMLParser.__init__(self)
 253
 254     def error(self, message):
 255         if self.error_count > 10 or self.started:
 256             raise compat_html_parser.HTMLParseError(message, self.getpos())
 257         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 258         self.error_count += 1
 259         self.goahead(1)
 260
 261     def loads(self, html):
 262         self.html = html
 263         self.feed(html)
 264         self.close()
 265
 266     def handle_starttag(self, tag, attrs):
 267         attrs = dict(attrs)
 268         if self.started:
 269             self.find_startpos(None)
 270         if self.attribute in attrs and attrs[self.attribute] == self.value:
 271             self.result = [tag]
 272             self.started = True
 273             self.watch_startpos = True
 274         if self.started:
 275             if not tag in self.depth: self.depth[tag] = 0
 276             self.depth[tag] += 1
 277
 278     def handle_endtag(self, tag):
 279         if self.started:
 280             if tag in self.depth: self.depth[tag] -= 1
 281             if self.depth[self.result[0]] == 0:
 282                 self.started = False
 283                 self.result.append(self.getpos())
 284
 285     def find_startpos(self, x):
 286         """Needed to put the start position of the result (self.result[1])
 287         after the opening tag with the requested id"""
 288         if self.watch_startpos:
 289             self.watch_startpos = False
 290             self.result.append(self.getpos())
 291     handle_entityref = handle_charref = handle_data = handle_comment = \
 292     handle_decl = handle_pi = unknown_decl = find_startpos
 293
 294     def get_result(self):
 295         if self.result is None:
 296             return None
 297         if len(self.result) != 3:
 298             return None
 299         lines = self.html.split('\n')
 300         lines = lines[self.result[1][0]-1:self.result[2][0]]
 301         lines[0] = lines[0][self.result[1][1]:]
 302         if len(lines) == 1:
 303             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
 304         lines[-1] = lines[-1][:self.result[2][1]]
 305         return '\n'.join(lines).strip()
 306 # Hack for https://github.com/rg3/youtube-dl/issues/662
 307 if sys.version_info < (2, 7, 3):
 308     AttrParser.parse_endtag = (lambda self, i:
 309         i + len("</scr'+'ipt>")
 310         if self.rawdata[i:].startswith("</scr'+'ipt>")
 311         else compat_html_parser.HTMLParser.parse_endtag(self, i))
 312
 313 def get_element_by_id(id, html):
 314     """Return the content of the tag with the specified ID in the passed HTML document"""
 315     return get_element_by_attribute("id", id, html)
 316
 317 def get_element_by_attribute(attribute, value, html):
 318     """Return the content of the tag with the specified attribute in the passed HTML document"""
 319     parser = AttrParser(attribute, value)
 320     try:
 321         parser.loads(html)
 322     except compat_html_parser.HTMLParseError:
 323         pass
 324     return parser.get_result()
 325
 326
 327 def clean_html(html):
 328     """Clean an HTML snippet into a readable string"""
 329     # Newline vs <br />
 330     html = html.replace('\n', ' ')
 331     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 332     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 333     # Strip html tags
 334     html = re.sub('<.*?>', '', html)
 335     # Replace html entities
 336     html = unescapeHTML(html)
 337     return html.strip()
 338
 339
 340 def sanitize_open(filename, open_mode):
 341     """Try to open the given filename, and slightly tweak it if this fails.
 342
 343     Attempts to open the given filename. If this fails, it tries to change
 344     the filename slightly, step by step, until it's either able to open it
 345     or it fails and raises a final exception, like the standard open()
 346     function.
 347
 348     It returns the tuple (stream, definitive_file_name).
 349     """
 350     try:
 351         if filename == u'-':
 352             if sys.platform == 'win32':
 353                 import msvcrt
 354                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 355             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 356         stream = open(encodeFilename(filename), open_mode)
 357         return (stream, filename)
 358     except (IOError, OSError) as err:
 359         if err.errno in (errno.EACCES,):
 360             raise
 361
 362         # In case of error, try to remove win32 forbidden chars
 363         alt_filename = os.path.join(
 364                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 365                         for path_part in os.path.split(filename)
 366                        )
 367         if alt_filename == filename:
 368             raise
 369         else:
 370             # An exception here should be caught in the caller
 371             stream = open(encodeFilename(filename), open_mode)
 372             return (stream, alt_filename)
 373
 374
 375 def timeconvert(timestr):
 376     """Convert RFC 2822 defined time string into system timestamp"""
 377     timestamp = None
 378     timetuple = email.utils.parsedate_tz(timestr)
 379     if timetuple is not None:
 380         timestamp = email.utils.mktime_tz(timetuple)
 381     return timestamp
 382
 383 def sanitize_filename(s, restricted=False, is_id=False):
 384     """Sanitizes a string so it could be used as part of a filename.
 385     If restricted is set, use a stricter subset of allowed characters.
 386     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 387     """
 388     def replace_insane(char):
 389         if char == '?' or ord(char) < 32 or ord(char) == 127:
 390             return ''
 391         elif char == '"':
 392             return '' if restricted else '\''
 393         elif char == ':':
 394             return '_-' if restricted else ' -'
 395         elif char in '\\/|*<>':
 396             return '_'
 397         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 398             return '_'
 399         if restricted and ord(char) > 127:
 400             return '_'
 401         return char
 402
 403     result = u''.join(map(replace_insane, s))
 404     if not is_id:
 405         while '__' in result:
 406             result = result.replace('__', '_')
 407         result = result.strip('_')
 408         # Common case of "Foreign band name - English song title"
 409         if restricted and result.startswith('-_'):
 410             result = result[2:]
 411         if not result:
 412             result = '_'
 413     return result
 414
 415 def orderedSet(iterable):
 416     """ Remove all duplicates from the input iterable """
 417     res = []
 418     for el in iterable:
 419         if el not in res:
 420             res.append(el)
 421     return res
 422
 423 def unescapeHTML(s):
 424     """
 425     @param s a string
 426     """
 427     assert type(s) == type(u'')
 428
 429     result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 430     return result
 431
 432 def encodeFilename(s):
 433     """
 434     @param s The name of the file
 435     """
 436
 437     assert type(s) == type(u'')
 438
 439     # Python 3 has a Unicode API
 440     if sys.version_info >= (3, 0):
 441         return s
 442
 443     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 444         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 445         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 446         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 447         return s
 448     else:
 449         encoding = sys.getfilesystemencoding()
 450         if encoding is None:
 451             encoding = 'utf-8'
 452         return s.encode(encoding, 'ignore')
 453
 454 def decodeOption(optval):
 455     if optval is None:
 456         return optval
 457     if isinstance(optval, bytes):
 458         optval = optval.decode(preferredencoding())
 459
 460     assert isinstance(optval, compat_str)
 461     return optval
 462
 463 def formatSeconds(secs):
 464     if secs > 3600:
 465         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 466     elif secs > 60:
 467         return '%d:%02d' % (secs // 60, secs % 60)
 468     else:
 469         return '%d' % secs
 470
 471 def make_HTTPS_handler(opts):
 472     if sys.version_info < (3,2):
 473         # Python's 2.x handler is very simplistic
 474         return compat_urllib_request.HTTPSHandler()
 475     else:
 476         import ssl
 477         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 478         context.set_default_verify_paths()
 479
 480         context.verify_mode = (ssl.CERT_NONE
 481                                if opts.no_check_certificate
 482                                else ssl.CERT_REQUIRED)
 483         return compat_urllib_request.HTTPSHandler(context=context)
 484
 485 class ExtractorError(Exception):
 486     """Error during info extraction."""
 487     def __init__(self, msg, tb=None, expected=False):
 488         """ tb, if given, is the original traceback (so that it can be printed out).
 489         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 490         """
 491
 492         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 493             expected = True
 494         if not expected:
 495             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 496         super(ExtractorError, self).__init__(msg)
 497
 498         self.traceback = tb
 499         self.exc_info = sys.exc_info()  # preserve original exception
 500
 501     def format_traceback(self):
 502         if self.traceback is None:
 503             return None
 504         return u''.join(traceback.format_tb(self.traceback))
 505
 506
 507 class DownloadError(Exception):
 508     """Download Error exception.
 509
 510     This exception may be thrown by FileDownloader objects if they are not
 511     configured to continue on errors. They will contain the appropriate
 512     error message.
 513     """
 514     def __init__(self, msg, exc_info=None):
 515         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 516         super(DownloadError, self).__init__(msg)
 517         self.exc_info = exc_info
 518
 519
 520 class SameFileError(Exception):
 521     """Same File exception.
 522
 523     This exception will be thrown by FileDownloader objects if they detect
 524     multiple files would have to be downloaded to the same file on disk.
 525     """
 526     pass
 527
 528
 529 class PostProcessingError(Exception):
 530     """Post Processing exception.
 531
 532     This exception may be raised by PostProcessor's .run() method to
 533     indicate an error in the postprocessing task.
 534     """
 535     def __init__(self, msg):
 536         self.msg = msg
 537
 538 class MaxDownloadsReached(Exception):
 539     """ --max-downloads limit has been reached. """
 540     pass
 541
 542
 543 class UnavailableVideoError(Exception):
 544     """Unavailable Format exception.
 545
 546     This exception will be thrown when a video is requested
 547     in a format that is not available for that video.
 548     """
 549     pass
 550
 551
 552 class ContentTooShortError(Exception):
 553     """Content Too Short exception.
 554
 555     This exception may be raised by FileDownloader objects when a file they
 556     download is too small for what the server announced first, indicating
 557     the connection was probably interrupted.
 558     """
 559     # Both in bytes
 560     downloaded = None
 561     expected = None
 562
 563     def __init__(self, downloaded, expected):
 564         self.downloaded = downloaded
 565         self.expected = expected
 566
 567 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 568     """Handler for HTTP requests and responses.
 569
 570     This class, when installed with an OpenerDirector, automatically adds
 571     the standard headers to every HTTP request and handles gzipped and
 572     deflated responses from web servers. If compression is to be avoided in
 573     a particular request, the original request in the program code only has
 574     to include the HTTP header "Youtubedl-No-Compression", which will be
 575     removed before making the real request.
 576
 577     Part of this code was copied from:
 578
 579     http://techknack.net/python-urllib2-handlers/
 580
 581     Andrew Rowls, the author of that code, agreed to release it to the
 582     public domain.
 583     """
 584
 585     @staticmethod
 586     def deflate(data):
 587         try:
 588             return zlib.decompress(data, -zlib.MAX_WBITS)
 589         except zlib.error:
 590             return zlib.decompress(data)
 591
 592     @staticmethod
 593     def addinfourl_wrapper(stream, headers, url, code):
 594         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 595             return compat_urllib_request.addinfourl(stream, headers, url, code)
 596         ret = compat_urllib_request.addinfourl(stream, headers, url)
 597         ret.code = code
 598         return ret
 599
 600     def http_request(self, req):
 601         for h,v in std_headers.items():
 602             if h in req.headers:
 603                 del req.headers[h]
 604             req.add_header(h, v)
 605         if 'Youtubedl-no-compression' in req.headers:
 606             if 'Accept-encoding' in req.headers:
 607                 del req.headers['Accept-encoding']
 608             del req.headers['Youtubedl-no-compression']
 609         if 'Youtubedl-user-agent' in req.headers:
 610             if 'User-agent' in req.headers:
 611                 del req.headers['User-agent']
 612             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 613             del req.headers['Youtubedl-user-agent']
 614         return req
 615
 616     def http_response(self, req, resp):
 617         old_resp = resp
 618         # gzip
 619         if resp.headers.get('Content-encoding', '') == 'gzip':
 620             gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
 621             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 622             resp.msg = old_resp.msg
 623         # deflate
 624         if resp.headers.get('Content-encoding', '') == 'deflate':
 625             gz = io.BytesIO(self.deflate(resp.read()))
 626             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 627             resp.msg = old_resp.msg
 628         return resp
 629
 630     https_request = http_request
 631     https_response = http_response
 632
 633 def unified_strdate(date_str):
 634     """Return a string with the date in the format YYYYMMDD"""
 635     upload_date = None
 636     #Replace commas
 637     date_str = date_str.replace(',',' ')
 638     # %z (UTC offset) is only supported in python>=3.2
 639     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
 640     format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
 641     for expression in format_expressions:
 642         try:
 643             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 644         except:
 645             pass
 646     return upload_date
 647
 648 def determine_ext(url):
 649     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 650     if re.match(r'^[A-Za-z0-9]+$', guess):
 651         return guess
 652     else:
 653         return u'unknown_video'
 654
 655 def date_from_str(date_str):
 656     """
 657     Return a datetime object from a string in the format YYYYMMDD or
 658     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 659     today = datetime.date.today()
 660     if date_str == 'now'or date_str == 'today':
 661         return today
 662     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 663     if match is not None:
 664         sign = match.group('sign')
 665         time = int(match.group('time'))
 666         if sign == '-':
 667             time = -time
 668         unit = match.group('unit')
 669         #A bad aproximation?
 670         if unit == 'month':
 671             unit = 'day'
 672             time *= 30
 673         elif unit == 'year':
 674             unit = 'day'
 675             time *= 365
 676         unit += 's'
 677         delta = datetime.timedelta(**{unit: time})
 678         return today + delta
 679     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 680
 681 class DateRange(object):
 682     """Represents a time interval between two dates"""
 683     def __init__(self, start=None, end=None):
 684         """start and end must be strings in the format accepted by date"""
 685         if start is not None:
 686             self.start = date_from_str(start)
 687         else:
 688             self.start = datetime.datetime.min.date()
 689         if end is not None:
 690             self.end = date_from_str(end)
 691         else:
 692             self.end = datetime.datetime.max.date()
 693         if self.start > self.end:
 694             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 695     @classmethod
 696     def day(cls, day):
 697         """Returns a range that only contains the given day"""
 698         return cls(day,day)
 699     def __contains__(self, date):
 700         """Check if the date is in the range"""
 701         if not isinstance(date, datetime.date):
 702             date = date_from_str(date)
 703         return self.start <= date <= self.end
 704     def __str__(self):
 705         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())