X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=9f49507c1256a707a139bbbba407a2b17d3a87ad;hb=a43ee88c6f888196b47cb1e12463a64ada0ead12;hp=9124c36212481b860b92d4bc35676e4ab3453bd4;hpb=d05cfe06006c4a44032e95dde047d5e12be8674c;p=youtube-dl.git diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9124c3621..9f49507c1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -280,6 +280,11 @@ if sys.version_info >= (2, 7): return node.find(expr) else: def find_xpath_attr(node, xpath, key, val): + # Here comes the crazy part: In 2.6, if the xpath is a unicode, + # .//node does not match if a node is a direct child of . ! + if isinstance(xpath, unicode): + xpath = xpath.encode('ascii') + for f in node.findall(xpath): if f.attrib.get(key) == val: return f @@ -299,6 +304,20 @@ def xpath_with_ns(path, ns_map): return '/'.join(replaced) +def xpath_text(node, xpath, name=None, fatal=False): + if sys.version_info < (2, 7): # Crazy 2.6 + xpath = xpath.encode('ascii') + + n = node.find(xpath) + if n is None: + if fatal: + name = xpath if name is None else name + raise ExtractorError('Could not find XML element %s' % name) + else: + return None + return n.text + + compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix class BaseHTMLParser(compat_html_parser.HTMLParser): def __init(self): @@ -1365,14 +1384,16 @@ def check_executable(exe, args=[]): class PagedList(object): - def __init__(self, pagefunc, pagesize): - self._pagefunc = pagefunc - self._pagesize = pagesize - def __len__(self): # This is only useful for tests return len(self.getslice()) + +class OnDemandPagedList(PagedList): + def __init__(self, pagefunc, pagesize): + self._pagefunc = pagefunc + self._pagesize = pagesize + def getslice(self, start=0, end=None): res = [] for pagenum in itertools.count(start // self._pagesize): @@ -1411,6 +1432,35 @@ class PagedList(object): return res +class InAdvancePagedList(PagedList): + def __init__(self, pagefunc, pagecount, pagesize): + self._pagefunc = pagefunc + self._pagecount = pagecount + self._pagesize = pagesize + + def getslice(self, start=0, end=None): + res = [] + start_page = start // self._pagesize + end_page = ( + self._pagecount if end is None else (end // self._pagesize + 1)) + skip_elems = start - start_page * self._pagesize + only_more = None if end is None else end - start + for pagenum in range(start_page, end_page): + page = list(self._pagefunc(pagenum)) + if skip_elems: + page = page[skip_elems:] + skip_elems = None + if only_more is not None: + if len(page) < only_more: + only_more -= len(page) + else: + page = page[:only_more] + res.extend(page) + break + res.extend(page) + return res + + def uppercase_escape(s): unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( @@ -1423,7 +1473,7 @@ def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" if sys.version_info < (3, 0) and isinstance(s, unicode): s = s.encode('utf-8') - return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") #"%/;:@&=+$,!~*'()?#[]+" #?#[]+ + return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") def escape_url(url): @@ -1570,3 +1620,13 @@ except AttributeError: if ret: raise subprocess.CalledProcessError(ret, p.args, output=output) return output + + +def limit_length(s, length): + """ Add ellipses to overly long strings """ + if s is None: + return None + ELLIPSES = '...' + if len(s) > length: + return s[:length - len(ELLIPSES)] + ELLIPSES + return s