2 # -*- coding: utf-8 -*-
4 from __future__ import unicode_literals
30 import xml.etree.ElementTree
41 compat_urllib_parse_urlparse,
42 compat_urllib_request,
47 # This is not clearly defined otherwise
48 compiled_regex_type = type(re.compile(''))
51 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
52 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
53 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
54 'Accept-Encoding': 'gzip, deflate',
55 'Accept-Language': 'en-us,en;q=0.5',
58 def preferredencoding():
59 """Get preferred encoding.
61 Returns the best encoding scheme for the system, based on
62 locale.getpreferredencoding() and some further tweaks.
65 pref = locale.getpreferredencoding()
73 def write_json_file(obj, fn):
74 """ Encode obj as JSON and write it to fn, atomically if possible """
76 fn = encodeFilename(fn)
77 if sys.version_info < (3, 0) and sys.platform != 'win32':
78 encoding = get_filesystem_encoding()
79 # os.path.basename returns a bytes object, but NamedTemporaryFile
80 # will fail if the filename contains non ascii characters unless we
81 # use a unicode object
82 path_basename = lambda f: os.path.basename(fn).decode(encoding)
83 # the same for os.path.dirname
84 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
86 path_basename = os.path.basename
87 path_dirname = os.path.dirname
91 'prefix': path_basename(fn) + '.',
92 'dir': path_dirname(fn),
96 # In Python 2.x, json.dump expects a bytestream.
97 # In Python 3.x, it writes to a character stream
98 if sys.version_info < (3, 0):
106 tf = tempfile.NamedTemporaryFile(**args)
111 if sys.platform == 'win32':
112 # Need to remove existing file on Windows, else os.rename raises
113 # WindowsError or FileExistsError.
118 os.rename(tf.name, fn)
127 if sys.version_info >= (2, 7):
128 def find_xpath_attr(node, xpath, key, val):
129 """ Find the xpath xpath[@key=val] """
130 assert re.match(r'^[a-zA-Z-]+$', key)
131 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
132 expr = xpath + u"[@%s='%s']" % (key, val)
133 return node.find(expr)
135 def find_xpath_attr(node, xpath, key, val):
136 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
137 # .//node does not match if a node is a direct child of . !
138 if isinstance(xpath, unicode):
139 xpath = xpath.encode('ascii')
141 for f in node.findall(xpath):
142 if f.attrib.get(key) == val:
146 # On python2.6 the xml.etree.ElementTree.Element methods don't support
147 # the namespace parameter
148 def xpath_with_ns(path, ns_map):
149 components = [c.split(':') for c in path.split('/')]
153 replaced.append(c[0])
156 replaced.append('{%s}%s' % (ns_map[ns], tag))
157 return '/'.join(replaced)
160 def xpath_text(node, xpath, name=None, fatal=False):
161 if sys.version_info < (2, 7): # Crazy 2.6
162 xpath = xpath.encode('ascii')
167 name = xpath if name is None else name
168 raise ExtractorError('Could not find XML element %s' % name)
174 def get_element_by_id(id, html):
175 """Return the content of the tag with the specified ID in the passed HTML document"""
176 return get_element_by_attribute("id", id, html)
179 def get_element_by_attribute(attribute, value, html):
180 """Return the content of the tag with the specified attribute in the passed HTML document"""
182 m = re.search(r'''(?xs)
184 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
186 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
190 ''' % (re.escape(attribute), re.escape(value)), html)
194 res = m.group('content')
196 if res.startswith('"') or res.startswith("'"):
199 return unescapeHTML(res)
202 def clean_html(html):
203 """Clean an HTML snippet into a readable string"""
205 html = html.replace('\n', ' ')
206 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
207 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
209 html = re.sub('<.*?>', '', html)
210 # Replace html entities
211 html = unescapeHTML(html)
215 def sanitize_open(filename, open_mode):
216 """Try to open the given filename, and slightly tweak it if this fails.
218 Attempts to open the given filename. If this fails, it tries to change
219 the filename slightly, step by step, until it's either able to open it
220 or it fails and raises a final exception, like the standard open()
223 It returns the tuple (stream, definitive_file_name).
227 if sys.platform == 'win32':
229 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
230 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
231 stream = open(encodeFilename(filename), open_mode)
232 return (stream, filename)
233 except (IOError, OSError) as err:
234 if err.errno in (errno.EACCES,):
237 # In case of error, try to remove win32 forbidden chars
238 alt_filename = os.path.join(
239 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
240 for path_part in os.path.split(filename)
242 if alt_filename == filename:
245 # An exception here should be caught in the caller
246 stream = open(encodeFilename(filename), open_mode)
247 return (stream, alt_filename)
250 def timeconvert(timestr):
251 """Convert RFC 2822 defined time string into system timestamp"""
253 timetuple = email.utils.parsedate_tz(timestr)
254 if timetuple is not None:
255 timestamp = email.utils.mktime_tz(timetuple)
258 def sanitize_filename(s, restricted=False, is_id=False):
259 """Sanitizes a string so it could be used as part of a filename.
260 If restricted is set, use a stricter subset of allowed characters.
261 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
263 def replace_insane(char):
264 if char == '?' or ord(char) < 32 or ord(char) == 127:
267 return '' if restricted else '\''
269 return '_-' if restricted else ' -'
270 elif char in '\\/|*<>':
272 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
274 if restricted and ord(char) > 127:
278 result = ''.join(map(replace_insane, s))
280 while '__' in result:
281 result = result.replace('__', '_')
282 result = result.strip('_')
283 # Common case of "Foreign band name - English song title"
284 if restricted and result.startswith('-_'):
290 def orderedSet(iterable):
291 """ Remove all duplicates from the input iterable """
299 def _htmlentity_transform(entity):
300 """Transforms an HTML entity to a character."""
301 # Known non-numeric HTML entity
302 if entity in compat_html_entities.name2codepoint:
303 return compat_chr(compat_html_entities.name2codepoint[entity])
305 mobj = re.match(r'#(x?[0-9]+)', entity)
307 numstr = mobj.group(1)
308 if numstr.startswith('x'):
310 numstr = '0%s' % numstr
313 return compat_chr(int(numstr, base))
315 # Unknown entity in name, return its literal representation
316 return ('&%s;' % entity)
322 assert type(s) == compat_str
325 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
328 def encodeFilename(s, for_subprocess=False):
330 @param s The name of the file
333 assert type(s) == compat_str
335 # Python 3 has a Unicode API
336 if sys.version_info >= (3, 0):
339 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
340 # Pass '' directly to use Unicode APIs on Windows 2000 and up
341 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
342 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
343 if not for_subprocess:
346 # For subprocess calls, encode with locale encoding
347 # Refer to http://stackoverflow.com/a/9951851/35070
348 encoding = preferredencoding()
350 encoding = sys.getfilesystemencoding()
353 return s.encode(encoding, 'ignore')
356 def encodeArgument(s):
357 if not isinstance(s, compat_str):
358 # Legacy code that uses byte strings
359 # Uncomment the following line after fixing all post processors
360 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
361 s = s.decode('ascii')
362 return encodeFilename(s, True)
365 def decodeOption(optval):
368 if isinstance(optval, bytes):
369 optval = optval.decode(preferredencoding())
371 assert isinstance(optval, compat_str)
374 def formatSeconds(secs):
376 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
378 return '%d:%02d' % (secs // 60, secs % 60)
383 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
384 if sys.version_info < (3, 2):
387 class HTTPSConnectionV3(httplib.HTTPSConnection):
388 def __init__(self, *args, **kwargs):
389 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
392 sock = socket.create_connection((self.host, self.port), self.timeout)
393 if getattr(self, '_tunnel_host', False):
397 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
399 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
401 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
402 def https_open(self, req):
403 return self.do_open(HTTPSConnectionV3, req)
404 return HTTPSHandlerV3(**kwargs)
405 elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
406 context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
407 context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
408 if opts_no_check_certificate:
409 context.verify_mode = ssl.CERT_NONE
410 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
412 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
413 context.verify_mode = (ssl.CERT_NONE
414 if opts_no_check_certificate
415 else ssl.CERT_REQUIRED)
416 context.set_default_verify_paths()
418 context.load_default_certs()
419 except AttributeError:
421 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
423 class ExtractorError(Exception):
424 """Error during info extraction."""
425 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
426 """ tb, if given, is the original traceback (so that it can be printed out).
427 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
430 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
432 if video_id is not None:
433 msg = video_id + ': ' + msg
435 msg += ' (caused by %r)' % cause
437 msg = msg + '; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
438 super(ExtractorError, self).__init__(msg)
441 self.exc_info = sys.exc_info() # preserve original exception
443 self.video_id = video_id
445 def format_traceback(self):
446 if self.traceback is None:
448 return ''.join(traceback.format_tb(self.traceback))
451 class RegexNotFoundError(ExtractorError):
452 """Error when a regex didn't match"""
456 class DownloadError(Exception):
457 """Download Error exception.
459 This exception may be thrown by FileDownloader objects if they are not
460 configured to continue on errors. They will contain the appropriate
463 def __init__(self, msg, exc_info=None):
464 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
465 super(DownloadError, self).__init__(msg)
466 self.exc_info = exc_info
469 class SameFileError(Exception):
470 """Same File exception.
472 This exception will be thrown by FileDownloader objects if they detect
473 multiple files would have to be downloaded to the same file on disk.
478 class PostProcessingError(Exception):
479 """Post Processing exception.
481 This exception may be raised by PostProcessor's .run() method to
482 indicate an error in the postprocessing task.
484 def __init__(self, msg):
487 class MaxDownloadsReached(Exception):
488 """ --max-downloads limit has been reached. """
492 class UnavailableVideoError(Exception):
493 """Unavailable Format exception.
495 This exception will be thrown when a video is requested
496 in a format that is not available for that video.
501 class ContentTooShortError(Exception):
502 """Content Too Short exception.
504 This exception may be raised by FileDownloader objects when a file they
505 download is too small for what the server announced first, indicating
506 the connection was probably interrupted.
512 def __init__(self, downloaded, expected):
513 self.downloaded = downloaded
514 self.expected = expected
516 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
517 """Handler for HTTP requests and responses.
519 This class, when installed with an OpenerDirector, automatically adds
520 the standard headers to every HTTP request and handles gzipped and
521 deflated responses from web servers. If compression is to be avoided in
522 a particular request, the original request in the program code only has
523 to include the HTTP header "Youtubedl-No-Compression", which will be
524 removed before making the real request.
526 Part of this code was copied from:
528 http://techknack.net/python-urllib2-handlers/
530 Andrew Rowls, the author of that code, agreed to release it to the
537 return zlib.decompress(data, -zlib.MAX_WBITS)
539 return zlib.decompress(data)
542 def addinfourl_wrapper(stream, headers, url, code):
543 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
544 return compat_urllib_request.addinfourl(stream, headers, url, code)
545 ret = compat_urllib_request.addinfourl(stream, headers, url)
549 def http_request(self, req):
550 for h, v in std_headers.items():
551 if h not in req.headers:
553 if 'Youtubedl-no-compression' in req.headers:
554 if 'Accept-encoding' in req.headers:
555 del req.headers['Accept-encoding']
556 del req.headers['Youtubedl-no-compression']
557 if 'Youtubedl-user-agent' in req.headers:
558 if 'User-agent' in req.headers:
559 del req.headers['User-agent']
560 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
561 del req.headers['Youtubedl-user-agent']
563 if sys.version_info < (2, 7) and '#' in req.get_full_url():
564 # Python 2.6 is brain-dead when it comes to fragments
565 req._Request__original = req._Request__original.partition('#')[0]
566 req._Request__r_type = req._Request__r_type.partition('#')[0]
570 def http_response(self, req, resp):
573 if resp.headers.get('Content-encoding', '') == 'gzip':
574 content = resp.read()
575 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
577 uncompressed = io.BytesIO(gz.read())
578 except IOError as original_ioerror:
579 # There may be junk add the end of the file
580 # See http://stackoverflow.com/q/4928560/35070 for details
581 for i in range(1, 1024):
583 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
584 uncompressed = io.BytesIO(gz.read())
589 raise original_ioerror
590 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
591 resp.msg = old_resp.msg
593 if resp.headers.get('Content-encoding', '') == 'deflate':
594 gz = io.BytesIO(self.deflate(resp.read()))
595 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
596 resp.msg = old_resp.msg
599 https_request = http_request
600 https_response = http_response
603 def parse_iso8601(date_str, delimiter='T'):
604 """ Return a UNIX timestamp from the given date """
610 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
613 timezone = datetime.timedelta()
615 date_str = date_str[:-len(m.group(0))]
616 if not m.group('sign'):
617 timezone = datetime.timedelta()
619 sign = 1 if m.group('sign') == '+' else -1
620 timezone = datetime.timedelta(
621 hours=sign * int(m.group('hours')),
622 minutes=sign * int(m.group('minutes')))
623 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
624 dt = datetime.datetime.strptime(date_str, date_format) - timezone
625 return calendar.timegm(dt.timetuple())
628 def unified_strdate(date_str):
629 """Return a string with the date in the format YYYYMMDD"""
636 date_str = date_str.replace(',', ' ')
637 # %z (UTC offset) is only supported in python>=3.2
638 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
639 format_expressions = [
644 '%b %dst %Y %I:%M%p',
645 '%b %dnd %Y %I:%M%p',
646 '%b %dth %Y %I:%M%p',
655 '%Y-%m-%d %H:%M:%S.%f',
658 '%Y-%m-%dT%H:%M:%SZ',
659 '%Y-%m-%dT%H:%M:%S.%fZ',
660 '%Y-%m-%dT%H:%M:%S.%f0Z',
662 '%Y-%m-%dT%H:%M:%S.%f',
665 for expression in format_expressions:
667 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
670 if upload_date is None:
671 timetuple = email.utils.parsedate_tz(date_str)
673 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
676 def determine_ext(url, default_ext='unknown_video'):
679 guess = url.partition('?')[0].rpartition('.')[2]
680 if re.match(r'^[A-Za-z0-9]+$', guess):
685 def subtitles_filename(filename, sub_lang, sub_format):
686 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
688 def date_from_str(date_str):
690 Return a datetime object from a string in the format YYYYMMDD or
691 (now|today)[+-][0-9](day|week|month|year)(s)?"""
692 today = datetime.date.today()
693 if date_str == 'now'or date_str == 'today':
695 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
696 if match is not None:
697 sign = match.group('sign')
698 time = int(match.group('time'))
701 unit = match.group('unit')
710 delta = datetime.timedelta(**{unit: time})
712 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
714 def hyphenate_date(date_str):
716 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
717 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
718 if match is not None:
719 return '-'.join(match.groups())
723 class DateRange(object):
724 """Represents a time interval between two dates"""
725 def __init__(self, start=None, end=None):
726 """start and end must be strings in the format accepted by date"""
727 if start is not None:
728 self.start = date_from_str(start)
730 self.start = datetime.datetime.min.date()
732 self.end = date_from_str(end)
734 self.end = datetime.datetime.max.date()
735 if self.start > self.end:
736 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
739 """Returns a range that only contains the given day"""
741 def __contains__(self, date):
742 """Check if the date is in the range"""
743 if not isinstance(date, datetime.date):
744 date = date_from_str(date)
745 return self.start <= date <= self.end
747 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
751 """ Returns the platform name as a compat_str """
752 res = platform.platform()
753 if isinstance(res, bytes):
754 res = res.decode(preferredencoding())
756 assert isinstance(res, compat_str)
760 def _windows_write_string(s, out):
761 """ Returns True if the string was written using special methods,
762 False if it has yet to be written out."""
763 # Adapted from http://stackoverflow.com/a/3259271/35070
766 import ctypes.wintypes
774 fileno = out.fileno()
775 except AttributeError:
776 # If the output stream doesn't have a fileno, it's virtual
778 if fileno not in WIN_OUTPUT_IDS:
781 GetStdHandle = ctypes.WINFUNCTYPE(
782 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
783 ("GetStdHandle", ctypes.windll.kernel32))
784 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
786 WriteConsoleW = ctypes.WINFUNCTYPE(
787 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
788 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
789 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
790 written = ctypes.wintypes.DWORD(0)
792 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
793 FILE_TYPE_CHAR = 0x0002
794 FILE_TYPE_REMOTE = 0x8000
795 GetConsoleMode = ctypes.WINFUNCTYPE(
796 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
797 ctypes.POINTER(ctypes.wintypes.DWORD))(
798 ("GetConsoleMode", ctypes.windll.kernel32))
799 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
801 def not_a_console(handle):
802 if handle == INVALID_HANDLE_VALUE or handle is None:
804 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
805 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
810 def next_nonbmp_pos(s):
812 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
813 except StopIteration:
817 count = min(next_nonbmp_pos(s), 1024)
820 h, s, count if count else 2, ctypes.byref(written), None)
822 raise OSError('Failed to write string')
823 if not count: # We just wrote a non-BMP character
824 assert written.value == 2
827 assert written.value > 0
828 s = s[written.value:]
832 def write_string(s, out=None, encoding=None):
835 assert type(s) == compat_str
837 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
838 if _windows_write_string(s, out):
841 if ('b' in getattr(out, 'mode', '') or
842 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
843 byt = s.encode(encoding or preferredencoding(), 'ignore')
845 elif hasattr(out, 'buffer'):
846 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
847 byt = s.encode(enc, 'ignore')
848 out.buffer.write(byt)
854 def bytes_to_intlist(bs):
857 if isinstance(bs[0], int): # Python 3
860 return [ord(c) for c in bs]
863 def intlist_to_bytes(xs):
866 return struct_pack('%dB' % len(xs), *xs)
869 # Cross-platform file locking
870 if sys.platform == 'win32':
871 import ctypes.wintypes
874 class OVERLAPPED(ctypes.Structure):
876 ('Internal', ctypes.wintypes.LPVOID),
877 ('InternalHigh', ctypes.wintypes.LPVOID),
878 ('Offset', ctypes.wintypes.DWORD),
879 ('OffsetHigh', ctypes.wintypes.DWORD),
880 ('hEvent', ctypes.wintypes.HANDLE),
883 kernel32 = ctypes.windll.kernel32
884 LockFileEx = kernel32.LockFileEx
885 LockFileEx.argtypes = [
886 ctypes.wintypes.HANDLE, # hFile
887 ctypes.wintypes.DWORD, # dwFlags
888 ctypes.wintypes.DWORD, # dwReserved
889 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
890 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
891 ctypes.POINTER(OVERLAPPED) # Overlapped
893 LockFileEx.restype = ctypes.wintypes.BOOL
894 UnlockFileEx = kernel32.UnlockFileEx
895 UnlockFileEx.argtypes = [
896 ctypes.wintypes.HANDLE, # hFile
897 ctypes.wintypes.DWORD, # dwReserved
898 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
899 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
900 ctypes.POINTER(OVERLAPPED) # Overlapped
902 UnlockFileEx.restype = ctypes.wintypes.BOOL
903 whole_low = 0xffffffff
904 whole_high = 0x7fffffff
906 def _lock_file(f, exclusive):
907 overlapped = OVERLAPPED()
908 overlapped.Offset = 0
909 overlapped.OffsetHigh = 0
910 overlapped.hEvent = 0
911 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
912 handle = msvcrt.get_osfhandle(f.fileno())
913 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
914 whole_low, whole_high, f._lock_file_overlapped_p):
915 raise OSError('Locking file failed: %r' % ctypes.FormatError())
918 assert f._lock_file_overlapped_p
919 handle = msvcrt.get_osfhandle(f.fileno())
920 if not UnlockFileEx(handle, 0,
921 whole_low, whole_high, f._lock_file_overlapped_p):
922 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
927 def _lock_file(f, exclusive):
928 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
931 fcntl.flock(f, fcntl.LOCK_UN)
934 class locked_file(object):
935 def __init__(self, filename, mode, encoding=None):
936 assert mode in ['r', 'a', 'w']
937 self.f = io.open(filename, mode, encoding=encoding)
941 exclusive = self.mode != 'r'
943 _lock_file(self.f, exclusive)
949 def __exit__(self, etype, value, traceback):
958 def write(self, *args):
959 return self.f.write(*args)
961 def read(self, *args):
962 return self.f.read(*args)
965 def get_filesystem_encoding():
966 encoding = sys.getfilesystemencoding()
967 return encoding if encoding is not None else 'utf-8'
970 def shell_quote(args):
972 encoding = get_filesystem_encoding()
974 if isinstance(a, bytes):
975 # We may get a filename encoded with 'encodeFilename'
976 a = a.decode(encoding)
977 quoted_args.append(pipes.quote(a))
978 return ' '.join(quoted_args)
981 def takewhile_inclusive(pred, seq):
982 """ Like itertools.takewhile, but include the latest evaluated element
983 (the first element so that Not pred(e)) """
990 def smuggle_url(url, data):
991 """ Pass additional data in a URL for internal use. """
993 sdata = compat_urllib_parse.urlencode(
994 {'__youtubedl_smuggle': json.dumps(data)})
995 return url + '#' + sdata
998 def unsmuggle_url(smug_url, default=None):
999 if not '#__youtubedl_smuggle' in smug_url:
1000 return smug_url, default
1001 url, _, sdata = smug_url.rpartition('#')
1002 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1003 data = json.loads(jsond)
1007 def format_bytes(bytes):
1010 if type(bytes) is str:
1011 bytes = float(bytes)
1015 exponent = int(math.log(bytes, 1024.0))
1016 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1017 converted = float(bytes) / float(1024 ** exponent)
1018 return '%.2f%s' % (converted, suffix)
1021 def get_term_width():
1022 columns = compat_getenv('COLUMNS', None)
1027 sp = subprocess.Popen(
1029 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1030 out, err = sp.communicate()
1031 return int(out.split()[1])
1037 def month_by_name(name):
1038 """ Return the number of a month by (locale-independently) English name """
1041 'January', 'February', 'March', 'April', 'May', 'June',
1042 'July', 'August', 'September', 'October', 'November', 'December']
1044 return ENGLISH_NAMES.index(name) + 1
1049 def fix_xml_ampersands(xml_str):
1050 """Replace all the '&' by '&' in XML"""
1052 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1057 def setproctitle(title):
1058 assert isinstance(title, compat_str)
1060 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1063 title_bytes = title.encode('utf-8')
1064 buf = ctypes.create_string_buffer(len(title_bytes))
1065 buf.value = title_bytes
1067 libc.prctl(15, buf, 0, 0, 0)
1068 except AttributeError:
1069 return # Strange libc, just skip this
1072 def remove_start(s, start):
1073 if s.startswith(start):
1074 return s[len(start):]
1078 def remove_end(s, end):
1080 return s[:-len(end)]
1084 def url_basename(url):
1085 path = compat_urlparse.urlparse(url).path
1086 return path.strip('/').split('/')[-1]
1089 class HEADRequest(compat_urllib_request.Request):
1090 def get_method(self):
1094 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1097 v = getattr(v, get_attr, None)
1100 return default if v is None else (int(v) * invscale // scale)
1103 def str_or_none(v, default=None):
1104 return default if v is None else compat_str(v)
1107 def str_to_int(int_str):
1108 """ A more relaxed version of int_or_none """
1111 int_str = re.sub(r'[,\.\+]', '', int_str)
1115 def float_or_none(v, scale=1, invscale=1, default=None):
1116 return default if v is None else (float(v) * invscale / scale)
1119 def parse_duration(s):
1128 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1129 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1131 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1134 res = int(m.group('secs'))
1136 res += int(m.group('mins')) * 60
1137 if m.group('hours'):
1138 res += int(m.group('hours')) * 60 * 60
1140 res += float(m.group('ms'))
1144 def prepend_extension(filename, ext):
1145 name, real_ext = os.path.splitext(filename)
1146 return '{0}.{1}{2}'.format(name, ext, real_ext)
1149 def check_executable(exe, args=[]):
1150 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1151 args can be a list of arguments for a short output (like -version) """
1153 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1159 def get_exe_version(exe, args=['--version'],
1160 version_re=r'version\s+([0-9._-a-zA-Z]+)',
1161 unrecognized='present'):
1162 """ Returns the version of the specified executable,
1163 or False if the executable is not present """
1165 out, err = subprocess.Popen(
1167 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1170 firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1171 m = re.search(version_re, firstline)
1178 class PagedList(object):
1180 # This is only useful for tests
1181 return len(self.getslice())
1184 class OnDemandPagedList(PagedList):
1185 def __init__(self, pagefunc, pagesize):
1186 self._pagefunc = pagefunc
1187 self._pagesize = pagesize
1189 def getslice(self, start=0, end=None):
1191 for pagenum in itertools.count(start // self._pagesize):
1192 firstid = pagenum * self._pagesize
1193 nextfirstid = pagenum * self._pagesize + self._pagesize
1194 if start >= nextfirstid:
1197 page_results = list(self._pagefunc(pagenum))
1200 start % self._pagesize
1201 if firstid <= start < nextfirstid
1205 ((end - 1) % self._pagesize) + 1
1206 if (end is not None and firstid <= end <= nextfirstid)
1209 if startv != 0 or endv is not None:
1210 page_results = page_results[startv:endv]
1211 res.extend(page_results)
1213 # A little optimization - if current page is not "full", ie. does
1214 # not contain page_size videos then we can assume that this page
1215 # is the last one - there are no more ids on further pages -
1216 # i.e. no need to query again.
1217 if len(page_results) + startv < self._pagesize:
1220 # If we got the whole page, but the next page is not interesting,
1221 # break out early as well
1222 if end == nextfirstid:
1227 class InAdvancePagedList(PagedList):
1228 def __init__(self, pagefunc, pagecount, pagesize):
1229 self._pagefunc = pagefunc
1230 self._pagecount = pagecount
1231 self._pagesize = pagesize
1233 def getslice(self, start=0, end=None):
1235 start_page = start // self._pagesize
1237 self._pagecount if end is None else (end // self._pagesize + 1))
1238 skip_elems = start - start_page * self._pagesize
1239 only_more = None if end is None else end - start
1240 for pagenum in range(start_page, end_page):
1241 page = list(self._pagefunc(pagenum))
1243 page = page[skip_elems:]
1245 if only_more is not None:
1246 if len(page) < only_more:
1247 only_more -= len(page)
1249 page = page[:only_more]
1256 def uppercase_escape(s):
1257 unicode_escape = codecs.getdecoder('unicode_escape')
1259 r'\\U[0-9a-fA-F]{8}',
1260 lambda m: unicode_escape(m.group(0))[0],
1264 def escape_rfc3986(s):
1265 """Escape non-ASCII characters as suggested by RFC 3986"""
1266 if sys.version_info < (3, 0) and isinstance(s, unicode):
1267 s = s.encode('utf-8')
1268 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1271 def escape_url(url):
1272 """Escape URL as suggested by RFC 3986"""
1273 url_parsed = compat_urllib_parse_urlparse(url)
1274 return url_parsed._replace(
1275 path=escape_rfc3986(url_parsed.path),
1276 params=escape_rfc3986(url_parsed.params),
1277 query=escape_rfc3986(url_parsed.query),
1278 fragment=escape_rfc3986(url_parsed.fragment)
1282 struct.pack('!I', 0)
1284 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1285 def struct_pack(spec, *args):
1286 if isinstance(spec, compat_str):
1287 spec = spec.encode('ascii')
1288 return struct.pack(spec, *args)
1290 def struct_unpack(spec, *args):
1291 if isinstance(spec, compat_str):
1292 spec = spec.encode('ascii')
1293 return struct.unpack(spec, *args)
1295 struct_pack = struct.pack
1296 struct_unpack = struct.unpack
1299 def read_batch_urls(batch_fd):
1301 if not isinstance(url, compat_str):
1302 url = url.decode('utf-8', 'replace')
1303 BOM_UTF8 = '\xef\xbb\xbf'
1304 if url.startswith(BOM_UTF8):
1305 url = url[len(BOM_UTF8):]
1307 if url.startswith(('#', ';', ']')):
1311 with contextlib.closing(batch_fd) as fd:
1312 return [url for url in map(fixup, fd) if url]
1315 def urlencode_postdata(*args, **kargs):
1316 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1320 etree_iter = xml.etree.ElementTree.Element.iter
1321 except AttributeError: # Python <=2.6
1322 etree_iter = lambda n: n.findall('.//*')
1326 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1327 def doctype(self, name, pubid, system):
1328 pass # Ignore doctypes
1330 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1331 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1332 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1333 # Fix up XML parser in Python 2.x
1334 if sys.version_info < (3, 0):
1335 for n in etree_iter(tree):
1336 if n.text is not None:
1337 if not isinstance(n.text, compat_str):
1338 n.text = n.text.decode('utf-8')
1351 def parse_age_limit(s):
1354 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1355 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1358 def strip_jsonp(code):
1360 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1363 def js_to_json(code):
1366 if v in ('true', 'false', 'null'):
1368 if v.startswith('"'):
1370 if v.startswith("'"):
1372 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1379 res = re.sub(r'''(?x)
1380 "(?:[^"\\]*(?:\\\\|\\")?)*"|
1381 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1382 [a-zA-Z_][a-zA-Z_0-9]*
1384 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1388 def qualities(quality_ids):
1389 """ Get a numeric quality value out of a list of possible values """
1392 return quality_ids.index(qid)
1398 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1401 def limit_length(s, length):
1402 """ Add ellipses to overly long strings """
1407 return s[:length - len(ELLIPSES)] + ELLIPSES
1411 def version_tuple(v):
1412 return [int(e) for e in v.split('.')]
1415 def is_outdated_version(version, limit, assume_new=True):
1417 return not assume_new
1419 return version_tuple(version) < version_tuple(limit)
1421 return not assume_new