release 2014.09.29

[youtube-dl.git] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 9124c36212481b860b92d4bc35676e4ab3453bd4..9f49507c1256a707a139bbbba407a2b17d3a87ad 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -280,6 +280,11 @@ if sys.version_info >= (2, 7):
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
+        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+        # .//node does not match if a node is a direct child of . !
+        if isinstance(xpath, unicode):
+            xpath = xpath.encode('ascii')
+
          for f in node.findall(xpath):
              if f.attrib.get(key) == val:
                  return f
@@ -299,6 +304,20 @@ def xpath_with_ns(path, ns_map):
      return '/'.join(replaced)
  
  
+def xpath_text(node, xpath, name=None, fatal=False):
+    if sys.version_info < (2, 7):  # Crazy 2.6
+        xpath = xpath.encode('ascii')
+
+    n = node.find(xpath)
+    if n is None:
+        if fatal:
+            name = xpath if name is None else name
+            raise ExtractorError('Could not find XML element %s' % name)
+        else:
+            return None
+    return n.text
+
+
  compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
  class BaseHTMLParser(compat_html_parser.HTMLParser):
      def __init(self):
@@ -1365,14 +1384,16 @@ def check_executable(exe, args=[]):
  
  
  class PagedList(object):
-    def __init__(self, pagefunc, pagesize):
-        self._pagefunc = pagefunc
-        self._pagesize = pagesize
-
      def __len__(self):
          # This is only useful for tests
          return len(self.getslice())
  
+
+class OnDemandPagedList(PagedList):
+    def __init__(self, pagefunc, pagesize):
+        self._pagefunc = pagefunc
+        self._pagesize = pagesize
+
      def getslice(self, start=0, end=None):
          res = []
          for pagenum in itertools.count(start // self._pagesize):
@@ -1411,6 +1432,35 @@ class PagedList(object):
          return res
  
  
+class InAdvancePagedList(PagedList):
+    def __init__(self, pagefunc, pagecount, pagesize):
+        self._pagefunc = pagefunc
+        self._pagecount = pagecount
+        self._pagesize = pagesize
+
+    def getslice(self, start=0, end=None):
+        res = []
+        start_page = start // self._pagesize
+        end_page = (
+            self._pagecount if end is None else (end // self._pagesize + 1))
+        skip_elems = start - start_page * self._pagesize
+        only_more = None if end is None else end - start
+        for pagenum in range(start_page, end_page):
+            page = list(self._pagefunc(pagenum))
+            if skip_elems:
+                page = page[skip_elems:]
+                skip_elems = None
+            if only_more is not None:
+                if len(page) < only_more:
+                    only_more -= len(page)
+                else:
+                    page = page[:only_more]
+                    res.extend(page)
+                    break
+            res.extend(page)
+        return res
+
+
  def uppercase_escape(s):
      unicode_escape = codecs.getdecoder('unicode_escape')
      return re.sub(
@@ -1423,7 +1473,7 @@ def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
      if sys.version_info < (3, 0) and isinstance(s, unicode):
          s = s.encode('utf-8')
-    return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") #"%/;:@&=+$,!~*'()?#[]+"   #?#[]+
+    return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
  
  
  def escape_url(url):
@@ -1570,3 +1620,13 @@ except AttributeError:
          if ret:
              raise subprocess.CalledProcessError(ret, p.args, output=output)
          return output
+
+
+def limit_length(s, length):
+    """ Add ellipses to overly long strings """
+    if s is None:
+        return None
+    ELLIPSES = '...'
+    if len(s) > length:
+        return s[:length - len(ELLIPSES)] + ELLIPSES
+    return s