2 # -*- coding: utf-8 -*-
27 import urllib.request as compat_urllib_request
28 except ImportError: # Python 2
29 import urllib2 as compat_urllib_request
32 import urllib.error as compat_urllib_error
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_error
37 import urllib.parse as compat_urllib_parse
38 except ImportError: # Python 2
39 import urllib as compat_urllib_parse
42 from urllib.parse import urlparse as compat_urllib_parse_urlparse
43 except ImportError: # Python 2
44 from urlparse import urlparse as compat_urllib_parse_urlparse
47 import urllib.parse as compat_urlparse
48 except ImportError: # Python 2
49 import urlparse as compat_urlparse
52 import http.cookiejar as compat_cookiejar
53 except ImportError: # Python 2
54 import cookielib as compat_cookiejar
57 import html.entities as compat_html_entities
58 except ImportError: # Python 2
59 import htmlentitydefs as compat_html_entities
62 import html.parser as compat_html_parser
63 except ImportError: # Python 2
64 import HTMLParser as compat_html_parser
67 import http.client as compat_http_client
68 except ImportError: # Python 2
69 import httplib as compat_http_client
72 from urllib.error import HTTPError as compat_HTTPError
73 except ImportError: # Python 2
74 from urllib2 import HTTPError as compat_HTTPError
77 from urllib.request import urlretrieve as compat_urlretrieve
78 except ImportError: # Python 2
79 from urllib import urlretrieve as compat_urlretrieve
83 from subprocess import DEVNULL
84 compat_subprocess_get_DEVNULL = lambda: DEVNULL
86 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
89 from urllib.parse import parse_qs as compat_parse_qs
90 except ImportError: # Python 2
91 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
92 # Python 2's version is apparently totally broken
93 def _unquote(string, encoding='utf-8', errors='replace'):
96 res = string.split('%')
103 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
110 pct_sequence += item[:2].decode('hex')
113 # This segment was just a single percent-encoded character.
114 # May be part of a sequence of code units, so delay decoding.
115 # (Stored in pct_sequence).
119 # Encountered non-percent-encoded characters. Flush the current
121 string += pct_sequence.decode(encoding, errors) + rest
124 # Flush the final pct_sequence
125 string += pct_sequence.decode(encoding, errors)
128 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
129 encoding='utf-8', errors='replace'):
130 qs, _coerce_result = qs, unicode
131 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
133 for name_value in pairs:
134 if not name_value and not strict_parsing:
136 nv = name_value.split('=', 1)
139 raise ValueError("bad query field: %r" % (name_value,))
140 # Handle case of a control-name with no equal sign
141 if keep_blank_values:
145 if len(nv[1]) or keep_blank_values:
146 name = nv[0].replace('+', ' ')
147 name = _unquote(name, encoding=encoding, errors=errors)
148 name = _coerce_result(name)
149 value = nv[1].replace('+', ' ')
150 value = _unquote(value, encoding=encoding, errors=errors)
151 value = _coerce_result(value)
152 r.append((name, value))
155 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
156 encoding='utf-8', errors='replace'):
158 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
159 encoding=encoding, errors=errors)
160 for name, value in pairs:
161 if name in parsed_result:
162 parsed_result[name].append(value)
164 parsed_result[name] = [value]
168 compat_str = unicode # Python 2
173 compat_chr = unichr # Python 2
178 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
179 except ImportError: # Python 2.6
180 from xml.parsers.expat import ExpatError as compat_xml_parse_error
183 if type(c) is int: return c
186 # This is not clearly defined otherwise
187 compiled_regex_type = type(re.compile(''))
190 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
191 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
192 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
193 'Accept-Encoding': 'gzip, deflate',
194 'Accept-Language': 'en-us,en;q=0.5',
197 def preferredencoding():
198 """Get preferred encoding.
200 Returns the best encoding scheme for the system, based on
201 locale.getpreferredencoding() and some further tweaks.
204 pref = locale.getpreferredencoding()
211 if sys.version_info < (3,0):
213 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
216 assert type(s) == type(u'')
219 # In Python 2.x, json.dump expects a bytestream.
220 # In Python 3.x, it writes to a character stream
221 if sys.version_info < (3,0):
222 def write_json_file(obj, fn):
223 with open(fn, 'wb') as f:
226 def write_json_file(obj, fn):
227 with open(fn, 'w', encoding='utf-8') as f:
230 if sys.version_info >= (2,7):
231 def find_xpath_attr(node, xpath, key, val):
232 """ Find the xpath xpath[@key=val] """
233 assert re.match(r'^[a-zA-Z]+$', key)
234 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
235 expr = xpath + u"[@%s='%s']" % (key, val)
236 return node.find(expr)
238 def find_xpath_attr(node, xpath, key, val):
239 for f in node.findall(xpath):
240 if f.attrib.get(key) == val:
244 # On python2.6 the xml.etree.ElementTree.Element methods don't support
245 # the namespace parameter
246 def xpath_with_ns(path, ns_map):
247 components = [c.split(':') for c in path.split('/')]
251 replaced.append(c[0])
254 replaced.append('{%s}%s' % (ns_map[ns], tag))
255 return '/'.join(replaced)
257 def htmlentity_transform(matchobj):
258 """Transforms an HTML entity to a character.
260 This function receives a match object and is intended to be used with
261 the re.sub() function.
263 entity = matchobj.group(1)
265 # Known non-numeric HTML entity
266 if entity in compat_html_entities.name2codepoint:
267 return compat_chr(compat_html_entities.name2codepoint[entity])
269 mobj = re.match(u'(?u)#(x?\\d+)', entity)
271 numstr = mobj.group(1)
272 if numstr.startswith(u'x'):
274 numstr = u'0%s' % numstr
277 return compat_chr(int(numstr, base))
279 # Unknown entity in name, return its literal representation
280 return (u'&%s;' % entity)
282 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
283 class BaseHTMLParser(compat_html_parser.HTMLParser):
285 compat_html_parser.HTMLParser.__init__(self)
288 def loads(self, html):
293 class AttrParser(BaseHTMLParser):
294 """Modified HTMLParser that isolates a tag with the specified attribute"""
295 def __init__(self, attribute, value):
296 self.attribute = attribute
301 self.watch_startpos = False
303 BaseHTMLParser.__init__(self)
305 def error(self, message):
306 if self.error_count > 10 or self.started:
307 raise compat_html_parser.HTMLParseError(message, self.getpos())
308 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
309 self.error_count += 1
312 def handle_starttag(self, tag, attrs):
315 self.find_startpos(None)
316 if self.attribute in attrs and attrs[self.attribute] == self.value:
319 self.watch_startpos = True
321 if not tag in self.depth: self.depth[tag] = 0
324 def handle_endtag(self, tag):
326 if tag in self.depth: self.depth[tag] -= 1
327 if self.depth[self.result[0]] == 0:
329 self.result.append(self.getpos())
331 def find_startpos(self, x):
332 """Needed to put the start position of the result (self.result[1])
333 after the opening tag with the requested id"""
334 if self.watch_startpos:
335 self.watch_startpos = False
336 self.result.append(self.getpos())
337 handle_entityref = handle_charref = handle_data = handle_comment = \
338 handle_decl = handle_pi = unknown_decl = find_startpos
340 def get_result(self):
341 if self.result is None:
343 if len(self.result) != 3:
345 lines = self.html.split('\n')
346 lines = lines[self.result[1][0]-1:self.result[2][0]]
347 lines[0] = lines[0][self.result[1][1]:]
349 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
350 lines[-1] = lines[-1][:self.result[2][1]]
351 return '\n'.join(lines).strip()
352 # Hack for https://github.com/rg3/youtube-dl/issues/662
353 if sys.version_info < (2, 7, 3):
354 AttrParser.parse_endtag = (lambda self, i:
355 i + len("</scr'+'ipt>")
356 if self.rawdata[i:].startswith("</scr'+'ipt>")
357 else compat_html_parser.HTMLParser.parse_endtag(self, i))
359 def get_element_by_id(id, html):
360 """Return the content of the tag with the specified ID in the passed HTML document"""
361 return get_element_by_attribute("id", id, html)
363 def get_element_by_attribute(attribute, value, html):
364 """Return the content of the tag with the specified attribute in the passed HTML document"""
365 parser = AttrParser(attribute, value)
368 except compat_html_parser.HTMLParseError:
370 return parser.get_result()
372 class MetaParser(BaseHTMLParser):
374 Modified HTMLParser that isolates a meta tag with the specified name
377 def __init__(self, name):
378 BaseHTMLParser.__init__(self)
383 def handle_starttag(self, tag, attrs):
387 if attrs.get('name') == self.name:
388 self.result = attrs.get('content')
390 def get_result(self):
393 def get_meta_content(name, html):
395 Return the content attribute from the meta tag with the given name attribute.
397 parser = MetaParser(name)
400 except compat_html_parser.HTMLParseError:
402 return parser.get_result()
405 def clean_html(html):
406 """Clean an HTML snippet into a readable string"""
408 html = html.replace('\n', ' ')
409 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
410 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
412 html = re.sub('<.*?>', '', html)
413 # Replace html entities
414 html = unescapeHTML(html)
418 def sanitize_open(filename, open_mode):
419 """Try to open the given filename, and slightly tweak it if this fails.
421 Attempts to open the given filename. If this fails, it tries to change
422 the filename slightly, step by step, until it's either able to open it
423 or it fails and raises a final exception, like the standard open()
426 It returns the tuple (stream, definitive_file_name).
430 if sys.platform == 'win32':
432 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
433 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
434 stream = open(encodeFilename(filename), open_mode)
435 return (stream, filename)
436 except (IOError, OSError) as err:
437 if err.errno in (errno.EACCES,):
440 # In case of error, try to remove win32 forbidden chars
441 alt_filename = os.path.join(
442 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
443 for path_part in os.path.split(filename)
445 if alt_filename == filename:
448 # An exception here should be caught in the caller
449 stream = open(encodeFilename(filename), open_mode)
450 return (stream, alt_filename)
453 def timeconvert(timestr):
454 """Convert RFC 2822 defined time string into system timestamp"""
456 timetuple = email.utils.parsedate_tz(timestr)
457 if timetuple is not None:
458 timestamp = email.utils.mktime_tz(timetuple)
461 def sanitize_filename(s, restricted=False, is_id=False):
462 """Sanitizes a string so it could be used as part of a filename.
463 If restricted is set, use a stricter subset of allowed characters.
464 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
466 def replace_insane(char):
467 if char == '?' or ord(char) < 32 or ord(char) == 127:
470 return '' if restricted else '\''
472 return '_-' if restricted else ' -'
473 elif char in '\\/|*<>':
475 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
477 if restricted and ord(char) > 127:
481 result = u''.join(map(replace_insane, s))
483 while '__' in result:
484 result = result.replace('__', '_')
485 result = result.strip('_')
486 # Common case of "Foreign band name - English song title"
487 if restricted and result.startswith('-_'):
493 def orderedSet(iterable):
494 """ Remove all duplicates from the input iterable """
505 assert type(s) == type(u'')
507 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
511 def encodeFilename(s, for_subprocess=False):
513 @param s The name of the file
516 assert type(s) == compat_str
518 # Python 3 has a Unicode API
519 if sys.version_info >= (3, 0):
522 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
523 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
524 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
525 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
526 if not for_subprocess:
529 # For subprocess calls, encode with locale encoding
530 # Refer to http://stackoverflow.com/a/9951851/35070
531 encoding = preferredencoding()
533 encoding = sys.getfilesystemencoding()
536 return s.encode(encoding, 'ignore')
539 def decodeOption(optval):
542 if isinstance(optval, bytes):
543 optval = optval.decode(preferredencoding())
545 assert isinstance(optval, compat_str)
548 def formatSeconds(secs):
550 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
552 return '%d:%02d' % (secs // 60, secs % 60)
557 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
558 if sys.version_info < (3, 2):
561 class HTTPSConnectionV3(httplib.HTTPSConnection):
562 def __init__(self, *args, **kwargs):
563 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
566 sock = socket.create_connection((self.host, self.port), self.timeout)
567 if getattr(self, '_tunnel_host', False):
571 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
573 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
575 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
576 def https_open(self, req):
577 return self.do_open(HTTPSConnectionV3, req)
578 return HTTPSHandlerV3(**kwargs)
580 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
581 context.verify_mode = (ssl.CERT_NONE
582 if opts_no_check_certificate
583 else ssl.CERT_REQUIRED)
584 context.set_default_verify_paths()
586 context.load_default_certs()
587 except AttributeError:
589 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
591 class ExtractorError(Exception):
592 """Error during info extraction."""
593 def __init__(self, msg, tb=None, expected=False, cause=None):
594 """ tb, if given, is the original traceback (so that it can be printed out).
595 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
598 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
601 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
602 super(ExtractorError, self).__init__(msg)
605 self.exc_info = sys.exc_info() # preserve original exception
608 def format_traceback(self):
609 if self.traceback is None:
611 return u''.join(traceback.format_tb(self.traceback))
614 class RegexNotFoundError(ExtractorError):
615 """Error when a regex didn't match"""
619 class DownloadError(Exception):
620 """Download Error exception.
622 This exception may be thrown by FileDownloader objects if they are not
623 configured to continue on errors. They will contain the appropriate
626 def __init__(self, msg, exc_info=None):
627 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
628 super(DownloadError, self).__init__(msg)
629 self.exc_info = exc_info
632 class SameFileError(Exception):
633 """Same File exception.
635 This exception will be thrown by FileDownloader objects if they detect
636 multiple files would have to be downloaded to the same file on disk.
641 class PostProcessingError(Exception):
642 """Post Processing exception.
644 This exception may be raised by PostProcessor's .run() method to
645 indicate an error in the postprocessing task.
647 def __init__(self, msg):
650 class MaxDownloadsReached(Exception):
651 """ --max-downloads limit has been reached. """
655 class UnavailableVideoError(Exception):
656 """Unavailable Format exception.
658 This exception will be thrown when a video is requested
659 in a format that is not available for that video.
664 class ContentTooShortError(Exception):
665 """Content Too Short exception.
667 This exception may be raised by FileDownloader objects when a file they
668 download is too small for what the server announced first, indicating
669 the connection was probably interrupted.
675 def __init__(self, downloaded, expected):
676 self.downloaded = downloaded
677 self.expected = expected
679 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
680 """Handler for HTTP requests and responses.
682 This class, when installed with an OpenerDirector, automatically adds
683 the standard headers to every HTTP request and handles gzipped and
684 deflated responses from web servers. If compression is to be avoided in
685 a particular request, the original request in the program code only has
686 to include the HTTP header "Youtubedl-No-Compression", which will be
687 removed before making the real request.
689 Part of this code was copied from:
691 http://techknack.net/python-urllib2-handlers/
693 Andrew Rowls, the author of that code, agreed to release it to the
700 return zlib.decompress(data, -zlib.MAX_WBITS)
702 return zlib.decompress(data)
705 def addinfourl_wrapper(stream, headers, url, code):
706 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
707 return compat_urllib_request.addinfourl(stream, headers, url, code)
708 ret = compat_urllib_request.addinfourl(stream, headers, url)
712 def http_request(self, req):
713 for h,v in std_headers.items():
717 if 'Youtubedl-no-compression' in req.headers:
718 if 'Accept-encoding' in req.headers:
719 del req.headers['Accept-encoding']
720 del req.headers['Youtubedl-no-compression']
721 if 'Youtubedl-user-agent' in req.headers:
722 if 'User-agent' in req.headers:
723 del req.headers['User-agent']
724 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
725 del req.headers['Youtubedl-user-agent']
728 def http_response(self, req, resp):
731 if resp.headers.get('Content-encoding', '') == 'gzip':
732 content = resp.read()
733 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
735 uncompressed = io.BytesIO(gz.read())
736 except IOError as original_ioerror:
737 # There may be junk add the end of the file
738 # See http://stackoverflow.com/q/4928560/35070 for details
739 for i in range(1, 1024):
741 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
742 uncompressed = io.BytesIO(gz.read())
747 raise original_ioerror
748 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
749 resp.msg = old_resp.msg
751 if resp.headers.get('Content-encoding', '') == 'deflate':
752 gz = io.BytesIO(self.deflate(resp.read()))
753 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
754 resp.msg = old_resp.msg
757 https_request = http_request
758 https_response = http_response
761 def unified_strdate(date_str):
762 """Return a string with the date in the format YYYYMMDD"""
765 date_str = date_str.replace(',', ' ')
766 # %z (UTC offset) is only supported in python>=3.2
767 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
768 format_expressions = [
778 '%Y-%m-%dT%H:%M:%SZ',
779 '%Y-%m-%dT%H:%M:%S.%fZ',
780 '%Y-%m-%dT%H:%M:%S.%f0Z',
784 for expression in format_expressions:
786 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
789 if upload_date is None:
790 timetuple = email.utils.parsedate_tz(date_str)
792 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
795 def determine_ext(url, default_ext=u'unknown_video'):
796 guess = url.partition(u'?')[0].rpartition(u'.')[2]
797 if re.match(r'^[A-Za-z0-9]+$', guess):
802 def subtitles_filename(filename, sub_lang, sub_format):
803 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
805 def date_from_str(date_str):
807 Return a datetime object from a string in the format YYYYMMDD or
808 (now|today)[+-][0-9](day|week|month|year)(s)?"""
809 today = datetime.date.today()
810 if date_str == 'now'or date_str == 'today':
812 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
813 if match is not None:
814 sign = match.group('sign')
815 time = int(match.group('time'))
818 unit = match.group('unit')
827 delta = datetime.timedelta(**{unit: time})
829 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
831 def hyphenate_date(date_str):
833 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
834 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
835 if match is not None:
836 return '-'.join(match.groups())
840 class DateRange(object):
841 """Represents a time interval between two dates"""
842 def __init__(self, start=None, end=None):
843 """start and end must be strings in the format accepted by date"""
844 if start is not None:
845 self.start = date_from_str(start)
847 self.start = datetime.datetime.min.date()
849 self.end = date_from_str(end)
851 self.end = datetime.datetime.max.date()
852 if self.start > self.end:
853 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
856 """Returns a range that only contains the given day"""
858 def __contains__(self, date):
859 """Check if the date is in the range"""
860 if not isinstance(date, datetime.date):
861 date = date_from_str(date)
862 return self.start <= date <= self.end
864 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
868 """ Returns the platform name as a compat_str """
869 res = platform.platform()
870 if isinstance(res, bytes):
871 res = res.decode(preferredencoding())
873 assert isinstance(res, compat_str)
877 def write_string(s, out=None):
880 assert type(s) == compat_str
882 if ('b' in getattr(out, 'mode', '') or
883 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
884 s = s.encode(preferredencoding(), 'ignore')
887 except UnicodeEncodeError:
888 # In Windows shells, this can fail even when the codec is just charmap!?
889 # See https://wiki.python.org/moin/PrintFails#Issue
890 if sys.platform == 'win32' and hasattr(out, 'encoding'):
891 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
899 def bytes_to_intlist(bs):
902 if isinstance(bs[0], int): # Python 3
905 return [ord(c) for c in bs]
908 def intlist_to_bytes(xs):
911 if isinstance(chr(0), bytes): # Python 2
912 return ''.join([chr(x) for x in xs])
917 def get_cachedir(params={}):
918 cache_root = os.environ.get('XDG_CACHE_HOME',
919 os.path.expanduser('~/.cache'))
920 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
923 # Cross-platform file locking
924 if sys.platform == 'win32':
925 import ctypes.wintypes
928 class OVERLAPPED(ctypes.Structure):
930 ('Internal', ctypes.wintypes.LPVOID),
931 ('InternalHigh', ctypes.wintypes.LPVOID),
932 ('Offset', ctypes.wintypes.DWORD),
933 ('OffsetHigh', ctypes.wintypes.DWORD),
934 ('hEvent', ctypes.wintypes.HANDLE),
937 kernel32 = ctypes.windll.kernel32
938 LockFileEx = kernel32.LockFileEx
939 LockFileEx.argtypes = [
940 ctypes.wintypes.HANDLE, # hFile
941 ctypes.wintypes.DWORD, # dwFlags
942 ctypes.wintypes.DWORD, # dwReserved
943 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
944 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
945 ctypes.POINTER(OVERLAPPED) # Overlapped
947 LockFileEx.restype = ctypes.wintypes.BOOL
948 UnlockFileEx = kernel32.UnlockFileEx
949 UnlockFileEx.argtypes = [
950 ctypes.wintypes.HANDLE, # hFile
951 ctypes.wintypes.DWORD, # dwReserved
952 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
953 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
954 ctypes.POINTER(OVERLAPPED) # Overlapped
956 UnlockFileEx.restype = ctypes.wintypes.BOOL
957 whole_low = 0xffffffff
958 whole_high = 0x7fffffff
960 def _lock_file(f, exclusive):
961 overlapped = OVERLAPPED()
962 overlapped.Offset = 0
963 overlapped.OffsetHigh = 0
964 overlapped.hEvent = 0
965 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
966 handle = msvcrt.get_osfhandle(f.fileno())
967 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
968 whole_low, whole_high, f._lock_file_overlapped_p):
969 raise OSError('Locking file failed: %r' % ctypes.FormatError())
972 assert f._lock_file_overlapped_p
973 handle = msvcrt.get_osfhandle(f.fileno())
974 if not UnlockFileEx(handle, 0,
975 whole_low, whole_high, f._lock_file_overlapped_p):
976 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
981 def _lock_file(f, exclusive):
982 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
985 fcntl.lockf(f, fcntl.LOCK_UN)
988 class locked_file(object):
989 def __init__(self, filename, mode, encoding=None):
990 assert mode in ['r', 'a', 'w']
991 self.f = io.open(filename, mode, encoding=encoding)
995 exclusive = self.mode != 'r'
997 _lock_file(self.f, exclusive)
1003 def __exit__(self, etype, value, traceback):
1005 _unlock_file(self.f)
1012 def write(self, *args):
1013 return self.f.write(*args)
1015 def read(self, *args):
1016 return self.f.read(*args)
1019 def shell_quote(args):
1021 encoding = sys.getfilesystemencoding()
1022 if encoding is None:
1025 if isinstance(a, bytes):
1026 # We may get a filename encoded with 'encodeFilename'
1027 a = a.decode(encoding)
1028 quoted_args.append(pipes.quote(a))
1029 return u' '.join(quoted_args)
1032 def takewhile_inclusive(pred, seq):
1033 """ Like itertools.takewhile, but include the latest evaluated element
1034 (the first element so that Not pred(e)) """
1041 def smuggle_url(url, data):
1042 """ Pass additional data in a URL for internal use. """
1044 sdata = compat_urllib_parse.urlencode(
1045 {u'__youtubedl_smuggle': json.dumps(data)})
1046 return url + u'#' + sdata
1049 def unsmuggle_url(smug_url, default=None):
1050 if not '#__youtubedl_smuggle' in smug_url:
1051 return smug_url, default
1052 url, _, sdata = smug_url.rpartition(u'#')
1053 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1054 data = json.loads(jsond)
1058 def format_bytes(bytes):
1061 if type(bytes) is str:
1062 bytes = float(bytes)
1066 exponent = int(math.log(bytes, 1024.0))
1067 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1068 converted = float(bytes) / float(1024 ** exponent)
1069 return u'%.2f%s' % (converted, suffix)
1072 def str_to_int(int_str):
1073 int_str = re.sub(r'[,\.]', u'', int_str)
1077 def get_term_width():
1078 columns = os.environ.get('COLUMNS', None)
1083 sp = subprocess.Popen(
1085 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1086 out, err = sp.communicate()
1087 return int(out.split()[1])
1093 def month_by_name(name):
1094 """ Return the number of a month by (locale-independently) English name """
1097 u'January', u'February', u'March', u'April', u'May', u'June',
1098 u'July', u'August', u'September', u'October', u'November', u'December']
1100 return ENGLISH_NAMES.index(name) + 1
1105 def fix_xml_ampersands(xml_str):
1106 """Replace all the '&' by '&' in XML"""
1108 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1113 def setproctitle(title):
1114 assert isinstance(title, compat_str)
1116 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1120 buf = ctypes.create_string_buffer(len(title) + 1)
1121 buf.value = title.encode('utf-8')
1123 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1124 except AttributeError:
1125 return # Strange libc, just skip this
1128 def remove_start(s, start):
1129 if s.startswith(start):
1130 return s[len(start):]
1134 def url_basename(url):
1135 path = compat_urlparse.urlparse(url).path
1136 return path.strip(u'/').split(u'/')[-1]
1139 class HEADRequest(compat_urllib_request.Request):
1140 def get_method(self):
1144 def int_or_none(v, scale=1):
1145 return v if v is None else (int(v) // scale)
1148 def parse_duration(s):
1153 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1156 res = int(m.group('secs'))
1158 res += int(m.group('mins')) * 60
1159 if m.group('hours'):
1160 res += int(m.group('hours')) * 60 * 60
1164 def prepend_extension(filename, ext):
1165 name, real_ext = os.path.splitext(filename)
1166 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1169 def check_executable(exe, args=[]):
1170 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1171 args can be a list of arguments for a short output (like -version) """
1173 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1179 class PagedList(object):
1180 def __init__(self, pagefunc, pagesize):
1181 self._pagefunc = pagefunc
1182 self._pagesize = pagesize
1185 # This is only useful for tests
1186 return len(self.getslice())
1188 def getslice(self, start=0, end=None):
1190 for pagenum in itertools.count(start // self._pagesize):
1191 firstid = pagenum * self._pagesize
1192 nextfirstid = pagenum * self._pagesize + self._pagesize
1193 if start >= nextfirstid:
1196 page_results = list(self._pagefunc(pagenum))
1199 start % self._pagesize
1200 if firstid <= start < nextfirstid
1204 ((end - 1) % self._pagesize) + 1
1205 if (end is not None and firstid <= end <= nextfirstid)
1208 if startv != 0 or endv is not None:
1209 page_results = page_results[startv:endv]
1210 res.extend(page_results)
1212 # A little optimization - if current page is not "full", ie. does
1213 # not contain page_size videos then we can assume that this page
1214 # is the last one - there are no more ids on further pages -
1215 # i.e. no need to query again.
1216 if len(page_results) + startv < self._pagesize:
1219 # If we got the whole page, but the next page is not interesting,
1220 # break out early as well
1221 if end == nextfirstid:
1226 def uppercase_escape(s):
1228 r'\\U([0-9a-fA-F]{8})',
1229 lambda m: compat_chr(int(m.group(1), base=16)), s)
1232 struct.pack(u'!I', 0)
1234 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1235 def struct_pack(spec, *args):
1236 if isinstance(spec, compat_str):
1237 spec = spec.encode('ascii')
1238 return struct.pack(spec, *args)
1240 def struct_unpack(spec, *args):
1241 if isinstance(spec, compat_str):
1242 spec = spec.encode('ascii')
1243 return struct.unpack(spec, *args)
1245 struct_pack = struct.pack
1246 struct_unpack = struct.unpack