2 # -*- coding: utf-8 -*-
27 import urllib.request as compat_urllib_request
28 except ImportError: # Python 2
29 import urllib2 as compat_urllib_request
32 import urllib.error as compat_urllib_error
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_error
37 import urllib.parse as compat_urllib_parse
38 except ImportError: # Python 2
39 import urllib as compat_urllib_parse
42 from urllib.parse import urlparse as compat_urllib_parse_urlparse
43 except ImportError: # Python 2
44 from urlparse import urlparse as compat_urllib_parse_urlparse
47 import urllib.parse as compat_urlparse
48 except ImportError: # Python 2
49 import urlparse as compat_urlparse
52 import http.cookiejar as compat_cookiejar
53 except ImportError: # Python 2
54 import cookielib as compat_cookiejar
57 import html.entities as compat_html_entities
58 except ImportError: # Python 2
59 import htmlentitydefs as compat_html_entities
62 import html.parser as compat_html_parser
63 except ImportError: # Python 2
64 import HTMLParser as compat_html_parser
67 import http.client as compat_http_client
68 except ImportError: # Python 2
69 import httplib as compat_http_client
72 from urllib.error import HTTPError as compat_HTTPError
73 except ImportError: # Python 2
74 from urllib2 import HTTPError as compat_HTTPError
77 from urllib.request import urlretrieve as compat_urlretrieve
78 except ImportError: # Python 2
79 from urllib import urlretrieve as compat_urlretrieve
83 from subprocess import DEVNULL
84 compat_subprocess_get_DEVNULL = lambda: DEVNULL
86 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
89 from urllib.parse import parse_qs as compat_parse_qs
90 except ImportError: # Python 2
91 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
92 # Python 2's version is apparently totally broken
93 def _unquote(string, encoding='utf-8', errors='replace'):
96 res = string.split('%')
103 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
110 pct_sequence += item[:2].decode('hex')
113 # This segment was just a single percent-encoded character.
114 # May be part of a sequence of code units, so delay decoding.
115 # (Stored in pct_sequence).
119 # Encountered non-percent-encoded characters. Flush the current
121 string += pct_sequence.decode(encoding, errors) + rest
124 # Flush the final pct_sequence
125 string += pct_sequence.decode(encoding, errors)
128 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
129 encoding='utf-8', errors='replace'):
130 qs, _coerce_result = qs, unicode
131 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
133 for name_value in pairs:
134 if not name_value and not strict_parsing:
136 nv = name_value.split('=', 1)
139 raise ValueError("bad query field: %r" % (name_value,))
140 # Handle case of a control-name with no equal sign
141 if keep_blank_values:
145 if len(nv[1]) or keep_blank_values:
146 name = nv[0].replace('+', ' ')
147 name = _unquote(name, encoding=encoding, errors=errors)
148 name = _coerce_result(name)
149 value = nv[1].replace('+', ' ')
150 value = _unquote(value, encoding=encoding, errors=errors)
151 value = _coerce_result(value)
152 r.append((name, value))
155 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
156 encoding='utf-8', errors='replace'):
158 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
159 encoding=encoding, errors=errors)
160 for name, value in pairs:
161 if name in parsed_result:
162 parsed_result[name].append(value)
164 parsed_result[name] = [value]
168 compat_str = unicode # Python 2
173 compat_chr = unichr # Python 2
178 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
179 except ImportError: # Python 2.6
180 from xml.parsers.expat import ExpatError as compat_xml_parse_error
183 if type(c) is int: return c
186 # This is not clearly defined otherwise
187 compiled_regex_type = type(re.compile(''))
190 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
191 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
192 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
193 'Accept-Encoding': 'gzip, deflate',
194 'Accept-Language': 'en-us,en;q=0.5',
197 def preferredencoding():
198 """Get preferred encoding.
200 Returns the best encoding scheme for the system, based on
201 locale.getpreferredencoding() and some further tweaks.
204 pref = locale.getpreferredencoding()
211 if sys.version_info < (3,0):
213 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
216 assert type(s) == type(u'')
219 # In Python 2.x, json.dump expects a bytestream.
220 # In Python 3.x, it writes to a character stream
221 if sys.version_info < (3,0):
222 def write_json_file(obj, fn):
223 with open(fn, 'wb') as f:
226 def write_json_file(obj, fn):
227 with open(fn, 'w', encoding='utf-8') as f:
230 if sys.version_info >= (2,7):
231 def find_xpath_attr(node, xpath, key, val):
232 """ Find the xpath xpath[@key=val] """
233 assert re.match(r'^[a-zA-Z]+$', key)
234 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
235 expr = xpath + u"[@%s='%s']" % (key, val)
236 return node.find(expr)
238 def find_xpath_attr(node, xpath, key, val):
239 for f in node.findall(xpath):
240 if f.attrib.get(key) == val:
244 # On python2.6 the xml.etree.ElementTree.Element methods don't support
245 # the namespace parameter
246 def xpath_with_ns(path, ns_map):
247 components = [c.split(':') for c in path.split('/')]
251 replaced.append(c[0])
254 replaced.append('{%s}%s' % (ns_map[ns], tag))
255 return '/'.join(replaced)
257 def htmlentity_transform(matchobj):
258 """Transforms an HTML entity to a character.
260 This function receives a match object and is intended to be used with
261 the re.sub() function.
263 entity = matchobj.group(1)
265 # Known non-numeric HTML entity
266 if entity in compat_html_entities.name2codepoint:
267 return compat_chr(compat_html_entities.name2codepoint[entity])
269 mobj = re.match(u'(?u)#(x?\\d+)', entity)
271 numstr = mobj.group(1)
272 if numstr.startswith(u'x'):
274 numstr = u'0%s' % numstr
277 return compat_chr(int(numstr, base))
279 # Unknown entity in name, return its literal representation
280 return (u'&%s;' % entity)
282 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
283 class BaseHTMLParser(compat_html_parser.HTMLParser):
285 compat_html_parser.HTMLParser.__init__(self)
288 def loads(self, html):
293 class AttrParser(BaseHTMLParser):
294 """Modified HTMLParser that isolates a tag with the specified attribute"""
295 def __init__(self, attribute, value):
296 self.attribute = attribute
301 self.watch_startpos = False
303 BaseHTMLParser.__init__(self)
305 def error(self, message):
306 if self.error_count > 10 or self.started:
307 raise compat_html_parser.HTMLParseError(message, self.getpos())
308 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
309 self.error_count += 1
312 def handle_starttag(self, tag, attrs):
315 self.find_startpos(None)
316 if self.attribute in attrs and attrs[self.attribute] == self.value:
319 self.watch_startpos = True
321 if not tag in self.depth: self.depth[tag] = 0
324 def handle_endtag(self, tag):
326 if tag in self.depth: self.depth[tag] -= 1
327 if self.depth[self.result[0]] == 0:
329 self.result.append(self.getpos())
331 def find_startpos(self, x):
332 """Needed to put the start position of the result (self.result[1])
333 after the opening tag with the requested id"""
334 if self.watch_startpos:
335 self.watch_startpos = False
336 self.result.append(self.getpos())
337 handle_entityref = handle_charref = handle_data = handle_comment = \
338 handle_decl = handle_pi = unknown_decl = find_startpos
340 def get_result(self):
341 if self.result is None:
343 if len(self.result) != 3:
345 lines = self.html.split('\n')
346 lines = lines[self.result[1][0]-1:self.result[2][0]]
347 lines[0] = lines[0][self.result[1][1]:]
349 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
350 lines[-1] = lines[-1][:self.result[2][1]]
351 return '\n'.join(lines).strip()
352 # Hack for https://github.com/rg3/youtube-dl/issues/662
353 if sys.version_info < (2, 7, 3):
354 AttrParser.parse_endtag = (lambda self, i:
355 i + len("</scr'+'ipt>")
356 if self.rawdata[i:].startswith("</scr'+'ipt>")
357 else compat_html_parser.HTMLParser.parse_endtag(self, i))
359 def get_element_by_id(id, html):
360 """Return the content of the tag with the specified ID in the passed HTML document"""
361 return get_element_by_attribute("id", id, html)
363 def get_element_by_attribute(attribute, value, html):
364 """Return the content of the tag with the specified attribute in the passed HTML document"""
365 parser = AttrParser(attribute, value)
368 except compat_html_parser.HTMLParseError:
370 return parser.get_result()
372 class MetaParser(BaseHTMLParser):
374 Modified HTMLParser that isolates a meta tag with the specified name
377 def __init__(self, name):
378 BaseHTMLParser.__init__(self)
383 def handle_starttag(self, tag, attrs):
387 if attrs.get('name') == self.name:
388 self.result = attrs.get('content')
390 def get_result(self):
393 def get_meta_content(name, html):
395 Return the content attribute from the meta tag with the given name attribute.
397 parser = MetaParser(name)
400 except compat_html_parser.HTMLParseError:
402 return parser.get_result()
405 def clean_html(html):
406 """Clean an HTML snippet into a readable string"""
408 html = html.replace('\n', ' ')
409 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
410 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
412 html = re.sub('<.*?>', '', html)
413 # Replace html entities
414 html = unescapeHTML(html)
418 def sanitize_open(filename, open_mode):
419 """Try to open the given filename, and slightly tweak it if this fails.
421 Attempts to open the given filename. If this fails, it tries to change
422 the filename slightly, step by step, until it's either able to open it
423 or it fails and raises a final exception, like the standard open()
426 It returns the tuple (stream, definitive_file_name).
430 if sys.platform == 'win32':
432 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
433 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
434 stream = open(encodeFilename(filename), open_mode)
435 return (stream, filename)
436 except (IOError, OSError) as err:
437 if err.errno in (errno.EACCES,):
440 # In case of error, try to remove win32 forbidden chars
441 alt_filename = os.path.join(
442 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
443 for path_part in os.path.split(filename)
445 if alt_filename == filename:
448 # An exception here should be caught in the caller
449 stream = open(encodeFilename(filename), open_mode)
450 return (stream, alt_filename)
453 def timeconvert(timestr):
454 """Convert RFC 2822 defined time string into system timestamp"""
456 timetuple = email.utils.parsedate_tz(timestr)
457 if timetuple is not None:
458 timestamp = email.utils.mktime_tz(timetuple)
461 def sanitize_filename(s, restricted=False, is_id=False):
462 """Sanitizes a string so it could be used as part of a filename.
463 If restricted is set, use a stricter subset of allowed characters.
464 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
466 def replace_insane(char):
467 if char == '?' or ord(char) < 32 or ord(char) == 127:
470 return '' if restricted else '\''
472 return '_-' if restricted else ' -'
473 elif char in '\\/|*<>':
475 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
477 if restricted and ord(char) > 127:
481 result = u''.join(map(replace_insane, s))
483 while '__' in result:
484 result = result.replace('__', '_')
485 result = result.strip('_')
486 # Common case of "Foreign band name - English song title"
487 if restricted and result.startswith('-_'):
493 def orderedSet(iterable):
494 """ Remove all duplicates from the input iterable """
505 assert type(s) == type(u'')
507 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
511 def encodeFilename(s, for_subprocess=False):
513 @param s The name of the file
516 assert type(s) == compat_str
518 # Python 3 has a Unicode API
519 if sys.version_info >= (3, 0):
522 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
523 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
524 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
525 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
526 if not for_subprocess:
529 # For subprocess calls, encode with locale encoding
530 # Refer to http://stackoverflow.com/a/9951851/35070
531 encoding = preferredencoding()
533 encoding = sys.getfilesystemencoding()
536 return s.encode(encoding, 'ignore')
539 def decodeOption(optval):
542 if isinstance(optval, bytes):
543 optval = optval.decode(preferredencoding())
545 assert isinstance(optval, compat_str)
548 def formatSeconds(secs):
550 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
552 return '%d:%02d' % (secs // 60, secs % 60)
557 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
558 if sys.version_info < (3, 2):
561 class HTTPSConnectionV3(httplib.HTTPSConnection):
562 def __init__(self, *args, **kwargs):
563 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
566 sock = socket.create_connection((self.host, self.port), self.timeout)
567 if getattr(self, '_tunnel_host', False):
571 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
573 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
575 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
576 def https_open(self, req):
577 return self.do_open(HTTPSConnectionV3, req)
578 return HTTPSHandlerV3(**kwargs)
580 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
581 context.verify_mode = (ssl.CERT_NONE
582 if opts_no_check_certificate
583 else ssl.CERT_REQUIRED)
584 context.set_default_verify_paths()
586 context.load_default_certs()
587 except AttributeError:
589 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
591 class ExtractorError(Exception):
592 """Error during info extraction."""
593 def __init__(self, msg, tb=None, expected=False, cause=None):
594 """ tb, if given, is the original traceback (so that it can be printed out).
595 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
598 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
601 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
602 super(ExtractorError, self).__init__(msg)
605 self.exc_info = sys.exc_info() # preserve original exception
608 def format_traceback(self):
609 if self.traceback is None:
611 return u''.join(traceback.format_tb(self.traceback))
614 class RegexNotFoundError(ExtractorError):
615 """Error when a regex didn't match"""
619 class DownloadError(Exception):
620 """Download Error exception.
622 This exception may be thrown by FileDownloader objects if they are not
623 configured to continue on errors. They will contain the appropriate
626 def __init__(self, msg, exc_info=None):
627 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
628 super(DownloadError, self).__init__(msg)
629 self.exc_info = exc_info
632 class SameFileError(Exception):
633 """Same File exception.
635 This exception will be thrown by FileDownloader objects if they detect
636 multiple files would have to be downloaded to the same file on disk.
641 class PostProcessingError(Exception):
642 """Post Processing exception.
644 This exception may be raised by PostProcessor's .run() method to
645 indicate an error in the postprocessing task.
647 def __init__(self, msg):
650 class MaxDownloadsReached(Exception):
651 """ --max-downloads limit has been reached. """
655 class UnavailableVideoError(Exception):
656 """Unavailable Format exception.
658 This exception will be thrown when a video is requested
659 in a format that is not available for that video.
664 class ContentTooShortError(Exception):
665 """Content Too Short exception.
667 This exception may be raised by FileDownloader objects when a file they
668 download is too small for what the server announced first, indicating
669 the connection was probably interrupted.
675 def __init__(self, downloaded, expected):
676 self.downloaded = downloaded
677 self.expected = expected
679 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
680 """Handler for HTTP requests and responses.
682 This class, when installed with an OpenerDirector, automatically adds
683 the standard headers to every HTTP request and handles gzipped and
684 deflated responses from web servers. If compression is to be avoided in
685 a particular request, the original request in the program code only has
686 to include the HTTP header "Youtubedl-No-Compression", which will be
687 removed before making the real request.
689 Part of this code was copied from:
691 http://techknack.net/python-urllib2-handlers/
693 Andrew Rowls, the author of that code, agreed to release it to the
700 return zlib.decompress(data, -zlib.MAX_WBITS)
702 return zlib.decompress(data)
705 def addinfourl_wrapper(stream, headers, url, code):
706 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
707 return compat_urllib_request.addinfourl(stream, headers, url, code)
708 ret = compat_urllib_request.addinfourl(stream, headers, url)
712 def http_request(self, req):
713 for h,v in std_headers.items():
717 if 'Youtubedl-no-compression' in req.headers:
718 if 'Accept-encoding' in req.headers:
719 del req.headers['Accept-encoding']
720 del req.headers['Youtubedl-no-compression']
721 if 'Youtubedl-user-agent' in req.headers:
722 if 'User-agent' in req.headers:
723 del req.headers['User-agent']
724 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
725 del req.headers['Youtubedl-user-agent']
728 def http_response(self, req, resp):
731 if resp.headers.get('Content-encoding', '') == 'gzip':
732 content = resp.read()
733 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
735 uncompressed = io.BytesIO(gz.read())
736 except IOError as original_ioerror:
737 # There may be junk add the end of the file
738 # See http://stackoverflow.com/q/4928560/35070 for details
739 for i in range(1, 1024):
741 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
742 uncompressed = io.BytesIO(gz.read())
747 raise original_ioerror
748 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
749 resp.msg = old_resp.msg
751 if resp.headers.get('Content-encoding', '') == 'deflate':
752 gz = io.BytesIO(self.deflate(resp.read()))
753 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
754 resp.msg = old_resp.msg
757 https_request = http_request
758 https_response = http_response
761 def unified_strdate(date_str):
762 """Return a string with the date in the format YYYYMMDD"""
765 date_str = date_str.replace(',', ' ')
766 # %z (UTC offset) is only supported in python>=3.2
767 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
768 format_expressions = [
778 '%Y-%m-%dT%H:%M:%SZ',
779 '%Y-%m-%dT%H:%M:%S.%fZ',
780 '%Y-%m-%dT%H:%M:%S.%f0Z',
782 '%Y-%m-%dT%H:%M:%S.%f',
785 for expression in format_expressions:
787 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
790 if upload_date is None:
791 timetuple = email.utils.parsedate_tz(date_str)
793 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
796 def determine_ext(url, default_ext=u'unknown_video'):
797 guess = url.partition(u'?')[0].rpartition(u'.')[2]
798 if re.match(r'^[A-Za-z0-9]+$', guess):
803 def subtitles_filename(filename, sub_lang, sub_format):
804 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
806 def date_from_str(date_str):
808 Return a datetime object from a string in the format YYYYMMDD or
809 (now|today)[+-][0-9](day|week|month|year)(s)?"""
810 today = datetime.date.today()
811 if date_str == 'now'or date_str == 'today':
813 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
814 if match is not None:
815 sign = match.group('sign')
816 time = int(match.group('time'))
819 unit = match.group('unit')
828 delta = datetime.timedelta(**{unit: time})
830 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
832 def hyphenate_date(date_str):
834 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
835 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
836 if match is not None:
837 return '-'.join(match.groups())
841 class DateRange(object):
842 """Represents a time interval between two dates"""
843 def __init__(self, start=None, end=None):
844 """start and end must be strings in the format accepted by date"""
845 if start is not None:
846 self.start = date_from_str(start)
848 self.start = datetime.datetime.min.date()
850 self.end = date_from_str(end)
852 self.end = datetime.datetime.max.date()
853 if self.start > self.end:
854 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
857 """Returns a range that only contains the given day"""
859 def __contains__(self, date):
860 """Check if the date is in the range"""
861 if not isinstance(date, datetime.date):
862 date = date_from_str(date)
863 return self.start <= date <= self.end
865 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
869 """ Returns the platform name as a compat_str """
870 res = platform.platform()
871 if isinstance(res, bytes):
872 res = res.decode(preferredencoding())
874 assert isinstance(res, compat_str)
878 def write_string(s, out=None):
881 assert type(s) == compat_str
883 if ('b' in getattr(out, 'mode', '') or
884 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
885 s = s.encode(preferredencoding(), 'ignore')
888 except UnicodeEncodeError:
889 # In Windows shells, this can fail even when the codec is just charmap!?
890 # See https://wiki.python.org/moin/PrintFails#Issue
891 if sys.platform == 'win32' and hasattr(out, 'encoding'):
892 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
900 def bytes_to_intlist(bs):
903 if isinstance(bs[0], int): # Python 3
906 return [ord(c) for c in bs]
909 def intlist_to_bytes(xs):
912 if isinstance(chr(0), bytes): # Python 2
913 return ''.join([chr(x) for x in xs])
918 def get_cachedir(params={}):
919 cache_root = os.environ.get('XDG_CACHE_HOME',
920 os.path.expanduser('~/.cache'))
921 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
924 # Cross-platform file locking
925 if sys.platform == 'win32':
926 import ctypes.wintypes
929 class OVERLAPPED(ctypes.Structure):
931 ('Internal', ctypes.wintypes.LPVOID),
932 ('InternalHigh', ctypes.wintypes.LPVOID),
933 ('Offset', ctypes.wintypes.DWORD),
934 ('OffsetHigh', ctypes.wintypes.DWORD),
935 ('hEvent', ctypes.wintypes.HANDLE),
938 kernel32 = ctypes.windll.kernel32
939 LockFileEx = kernel32.LockFileEx
940 LockFileEx.argtypes = [
941 ctypes.wintypes.HANDLE, # hFile
942 ctypes.wintypes.DWORD, # dwFlags
943 ctypes.wintypes.DWORD, # dwReserved
944 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
945 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
946 ctypes.POINTER(OVERLAPPED) # Overlapped
948 LockFileEx.restype = ctypes.wintypes.BOOL
949 UnlockFileEx = kernel32.UnlockFileEx
950 UnlockFileEx.argtypes = [
951 ctypes.wintypes.HANDLE, # hFile
952 ctypes.wintypes.DWORD, # dwReserved
953 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
954 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
955 ctypes.POINTER(OVERLAPPED) # Overlapped
957 UnlockFileEx.restype = ctypes.wintypes.BOOL
958 whole_low = 0xffffffff
959 whole_high = 0x7fffffff
961 def _lock_file(f, exclusive):
962 overlapped = OVERLAPPED()
963 overlapped.Offset = 0
964 overlapped.OffsetHigh = 0
965 overlapped.hEvent = 0
966 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
967 handle = msvcrt.get_osfhandle(f.fileno())
968 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
969 whole_low, whole_high, f._lock_file_overlapped_p):
970 raise OSError('Locking file failed: %r' % ctypes.FormatError())
973 assert f._lock_file_overlapped_p
974 handle = msvcrt.get_osfhandle(f.fileno())
975 if not UnlockFileEx(handle, 0,
976 whole_low, whole_high, f._lock_file_overlapped_p):
977 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
982 def _lock_file(f, exclusive):
983 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
986 fcntl.lockf(f, fcntl.LOCK_UN)
989 class locked_file(object):
990 def __init__(self, filename, mode, encoding=None):
991 assert mode in ['r', 'a', 'w']
992 self.f = io.open(filename, mode, encoding=encoding)
996 exclusive = self.mode != 'r'
998 _lock_file(self.f, exclusive)
1004 def __exit__(self, etype, value, traceback):
1006 _unlock_file(self.f)
1013 def write(self, *args):
1014 return self.f.write(*args)
1016 def read(self, *args):
1017 return self.f.read(*args)
1020 def shell_quote(args):
1022 encoding = sys.getfilesystemencoding()
1023 if encoding is None:
1026 if isinstance(a, bytes):
1027 # We may get a filename encoded with 'encodeFilename'
1028 a = a.decode(encoding)
1029 quoted_args.append(pipes.quote(a))
1030 return u' '.join(quoted_args)
1033 def takewhile_inclusive(pred, seq):
1034 """ Like itertools.takewhile, but include the latest evaluated element
1035 (the first element so that Not pred(e)) """
1042 def smuggle_url(url, data):
1043 """ Pass additional data in a URL for internal use. """
1045 sdata = compat_urllib_parse.urlencode(
1046 {u'__youtubedl_smuggle': json.dumps(data)})
1047 return url + u'#' + sdata
1050 def unsmuggle_url(smug_url, default=None):
1051 if not '#__youtubedl_smuggle' in smug_url:
1052 return smug_url, default
1053 url, _, sdata = smug_url.rpartition(u'#')
1054 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1055 data = json.loads(jsond)
1059 def format_bytes(bytes):
1062 if type(bytes) is str:
1063 bytes = float(bytes)
1067 exponent = int(math.log(bytes, 1024.0))
1068 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1069 converted = float(bytes) / float(1024 ** exponent)
1070 return u'%.2f%s' % (converted, suffix)
1073 def str_to_int(int_str):
1074 int_str = re.sub(r'[,\.]', u'', int_str)
1078 def get_term_width():
1079 columns = os.environ.get('COLUMNS', None)
1084 sp = subprocess.Popen(
1086 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1087 out, err = sp.communicate()
1088 return int(out.split()[1])
1094 def month_by_name(name):
1095 """ Return the number of a month by (locale-independently) English name """
1098 u'January', u'February', u'March', u'April', u'May', u'June',
1099 u'July', u'August', u'September', u'October', u'November', u'December']
1101 return ENGLISH_NAMES.index(name) + 1
1106 def fix_xml_ampersands(xml_str):
1107 """Replace all the '&' by '&' in XML"""
1109 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1114 def setproctitle(title):
1115 assert isinstance(title, compat_str)
1117 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1121 buf = ctypes.create_string_buffer(len(title) + 1)
1122 buf.value = title.encode('utf-8')
1124 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1125 except AttributeError:
1126 return # Strange libc, just skip this
1129 def remove_start(s, start):
1130 if s.startswith(start):
1131 return s[len(start):]
1135 def url_basename(url):
1136 path = compat_urlparse.urlparse(url).path
1137 return path.strip(u'/').split(u'/')[-1]
1140 class HEADRequest(compat_urllib_request.Request):
1141 def get_method(self):
1145 def int_or_none(v, scale=1):
1146 return v if v is None else (int(v) // scale)
1149 def parse_duration(s):
1154 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1157 res = int(m.group('secs'))
1159 res += int(m.group('mins')) * 60
1160 if m.group('hours'):
1161 res += int(m.group('hours')) * 60 * 60
1165 def prepend_extension(filename, ext):
1166 name, real_ext = os.path.splitext(filename)
1167 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1170 def check_executable(exe, args=[]):
1171 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1172 args can be a list of arguments for a short output (like -version) """
1174 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1180 class PagedList(object):
1181 def __init__(self, pagefunc, pagesize):
1182 self._pagefunc = pagefunc
1183 self._pagesize = pagesize
1186 # This is only useful for tests
1187 return len(self.getslice())
1189 def getslice(self, start=0, end=None):
1191 for pagenum in itertools.count(start // self._pagesize):
1192 firstid = pagenum * self._pagesize
1193 nextfirstid = pagenum * self._pagesize + self._pagesize
1194 if start >= nextfirstid:
1197 page_results = list(self._pagefunc(pagenum))
1200 start % self._pagesize
1201 if firstid <= start < nextfirstid
1205 ((end - 1) % self._pagesize) + 1
1206 if (end is not None and firstid <= end <= nextfirstid)
1209 if startv != 0 or endv is not None:
1210 page_results = page_results[startv:endv]
1211 res.extend(page_results)
1213 # A little optimization - if current page is not "full", ie. does
1214 # not contain page_size videos then we can assume that this page
1215 # is the last one - there are no more ids on further pages -
1216 # i.e. no need to query again.
1217 if len(page_results) + startv < self._pagesize:
1220 # If we got the whole page, but the next page is not interesting,
1221 # break out early as well
1222 if end == nextfirstid:
1227 def uppercase_escape(s):
1229 r'\\U([0-9a-fA-F]{8})',
1230 lambda m: compat_chr(int(m.group(1), base=16)), s)
1233 struct.pack(u'!I', 0)
1235 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1236 def struct_pack(spec, *args):
1237 if isinstance(spec, compat_str):
1238 spec = spec.encode('ascii')
1239 return struct.pack(spec, *args)
1241 def struct_unpack(spec, *args):
1242 if isinstance(spec, compat_str):
1243 spec = spec.encode('ascii')
1244 return struct.unpack(spec, *args)
1246 struct_pack = struct.pack
1247 struct_unpack = struct.unpack