2 # -*- coding: utf-8 -*-
28 import urllib.request as compat_urllib_request
29 except ImportError: # Python 2
30 import urllib2 as compat_urllib_request
33 import urllib.error as compat_urllib_error
34 except ImportError: # Python 2
35 import urllib2 as compat_urllib_error
38 import urllib.parse as compat_urllib_parse
39 except ImportError: # Python 2
40 import urllib as compat_urllib_parse
43 from urllib.parse import urlparse as compat_urllib_parse_urlparse
44 except ImportError: # Python 2
45 from urlparse import urlparse as compat_urllib_parse_urlparse
48 import urllib.parse as compat_urlparse
49 except ImportError: # Python 2
50 import urlparse as compat_urlparse
53 import http.cookiejar as compat_cookiejar
54 except ImportError: # Python 2
55 import cookielib as compat_cookiejar
58 import html.entities as compat_html_entities
59 except ImportError: # Python 2
60 import htmlentitydefs as compat_html_entities
63 import html.parser as compat_html_parser
64 except ImportError: # Python 2
65 import HTMLParser as compat_html_parser
68 import http.client as compat_http_client
69 except ImportError: # Python 2
70 import httplib as compat_http_client
73 from urllib.error import HTTPError as compat_HTTPError
74 except ImportError: # Python 2
75 from urllib2 import HTTPError as compat_HTTPError
78 from urllib.request import urlretrieve as compat_urlretrieve
79 except ImportError: # Python 2
80 from urllib import urlretrieve as compat_urlretrieve
84 from subprocess import DEVNULL
85 compat_subprocess_get_DEVNULL = lambda: DEVNULL
87 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
90 from urllib.parse import parse_qs as compat_parse_qs
91 except ImportError: # Python 2
92 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
93 # Python 2's version is apparently totally broken
94 def _unquote(string, encoding='utf-8', errors='replace'):
97 res = string.split('%')
104 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
111 pct_sequence += item[:2].decode('hex')
114 # This segment was just a single percent-encoded character.
115 # May be part of a sequence of code units, so delay decoding.
116 # (Stored in pct_sequence).
120 # Encountered non-percent-encoded characters. Flush the current
122 string += pct_sequence.decode(encoding, errors) + rest
125 # Flush the final pct_sequence
126 string += pct_sequence.decode(encoding, errors)
129 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
130 encoding='utf-8', errors='replace'):
131 qs, _coerce_result = qs, unicode
132 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
134 for name_value in pairs:
135 if not name_value and not strict_parsing:
137 nv = name_value.split('=', 1)
140 raise ValueError("bad query field: %r" % (name_value,))
141 # Handle case of a control-name with no equal sign
142 if keep_blank_values:
146 if len(nv[1]) or keep_blank_values:
147 name = nv[0].replace('+', ' ')
148 name = _unquote(name, encoding=encoding, errors=errors)
149 name = _coerce_result(name)
150 value = nv[1].replace('+', ' ')
151 value = _unquote(value, encoding=encoding, errors=errors)
152 value = _coerce_result(value)
153 r.append((name, value))
156 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
157 encoding='utf-8', errors='replace'):
159 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
160 encoding=encoding, errors=errors)
161 for name, value in pairs:
162 if name in parsed_result:
163 parsed_result[name].append(value)
165 parsed_result[name] = [value]
169 compat_str = unicode # Python 2
174 compat_chr = unichr # Python 2
179 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
180 except ImportError: # Python 2.6
181 from xml.parsers.expat import ExpatError as compat_xml_parse_error
184 if type(c) is int: return c
187 # This is not clearly defined otherwise
188 compiled_regex_type = type(re.compile(''))
191 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
192 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
193 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
194 'Accept-Encoding': 'gzip, deflate',
195 'Accept-Language': 'en-us,en;q=0.5',
198 def preferredencoding():
199 """Get preferred encoding.
201 Returns the best encoding scheme for the system, based on
202 locale.getpreferredencoding() and some further tweaks.
205 pref = locale.getpreferredencoding()
212 if sys.version_info < (3,0):
214 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
217 assert type(s) == type(u'')
220 # In Python 2.x, json.dump expects a bytestream.
221 # In Python 3.x, it writes to a character stream
222 if sys.version_info < (3,0):
223 def write_json_file(obj, fn):
224 with open(fn, 'wb') as f:
227 def write_json_file(obj, fn):
228 with open(fn, 'w', encoding='utf-8') as f:
231 if sys.version_info >= (2,7):
232 def find_xpath_attr(node, xpath, key, val):
233 """ Find the xpath xpath[@key=val] """
234 assert re.match(r'^[a-zA-Z]+$', key)
235 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
236 expr = xpath + u"[@%s='%s']" % (key, val)
237 return node.find(expr)
239 def find_xpath_attr(node, xpath, key, val):
240 for f in node.findall(xpath):
241 if f.attrib.get(key) == val:
245 # On python2.6 the xml.etree.ElementTree.Element methods don't support
246 # the namespace parameter
247 def xpath_with_ns(path, ns_map):
248 components = [c.split(':') for c in path.split('/')]
252 replaced.append(c[0])
255 replaced.append('{%s}%s' % (ns_map[ns], tag))
256 return '/'.join(replaced)
258 def htmlentity_transform(matchobj):
259 """Transforms an HTML entity to a character.
261 This function receives a match object and is intended to be used with
262 the re.sub() function.
264 entity = matchobj.group(1)
266 # Known non-numeric HTML entity
267 if entity in compat_html_entities.name2codepoint:
268 return compat_chr(compat_html_entities.name2codepoint[entity])
270 mobj = re.match(u'(?u)#(x?\\d+)', entity)
272 numstr = mobj.group(1)
273 if numstr.startswith(u'x'):
275 numstr = u'0%s' % numstr
278 return compat_chr(int(numstr, base))
280 # Unknown entity in name, return its literal representation
281 return (u'&%s;' % entity)
283 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
284 class BaseHTMLParser(compat_html_parser.HTMLParser):
286 compat_html_parser.HTMLParser.__init__(self)
289 def loads(self, html):
294 class AttrParser(BaseHTMLParser):
295 """Modified HTMLParser that isolates a tag with the specified attribute"""
296 def __init__(self, attribute, value):
297 self.attribute = attribute
302 self.watch_startpos = False
304 BaseHTMLParser.__init__(self)
306 def error(self, message):
307 if self.error_count > 10 or self.started:
308 raise compat_html_parser.HTMLParseError(message, self.getpos())
309 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
310 self.error_count += 1
313 def handle_starttag(self, tag, attrs):
316 self.find_startpos(None)
317 if self.attribute in attrs and attrs[self.attribute] == self.value:
320 self.watch_startpos = True
322 if not tag in self.depth: self.depth[tag] = 0
325 def handle_endtag(self, tag):
327 if tag in self.depth: self.depth[tag] -= 1
328 if self.depth[self.result[0]] == 0:
330 self.result.append(self.getpos())
332 def find_startpos(self, x):
333 """Needed to put the start position of the result (self.result[1])
334 after the opening tag with the requested id"""
335 if self.watch_startpos:
336 self.watch_startpos = False
337 self.result.append(self.getpos())
338 handle_entityref = handle_charref = handle_data = handle_comment = \
339 handle_decl = handle_pi = unknown_decl = find_startpos
341 def get_result(self):
342 if self.result is None:
344 if len(self.result) != 3:
346 lines = self.html.split('\n')
347 lines = lines[self.result[1][0]-1:self.result[2][0]]
348 lines[0] = lines[0][self.result[1][1]:]
350 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
351 lines[-1] = lines[-1][:self.result[2][1]]
352 return '\n'.join(lines).strip()
353 # Hack for https://github.com/rg3/youtube-dl/issues/662
354 if sys.version_info < (2, 7, 3):
355 AttrParser.parse_endtag = (lambda self, i:
356 i + len("</scr'+'ipt>")
357 if self.rawdata[i:].startswith("</scr'+'ipt>")
358 else compat_html_parser.HTMLParser.parse_endtag(self, i))
360 def get_element_by_id(id, html):
361 """Return the content of the tag with the specified ID in the passed HTML document"""
362 return get_element_by_attribute("id", id, html)
364 def get_element_by_attribute(attribute, value, html):
365 """Return the content of the tag with the specified attribute in the passed HTML document"""
366 parser = AttrParser(attribute, value)
369 except compat_html_parser.HTMLParseError:
371 return parser.get_result()
373 class MetaParser(BaseHTMLParser):
375 Modified HTMLParser that isolates a meta tag with the specified name
378 def __init__(self, name):
379 BaseHTMLParser.__init__(self)
384 def handle_starttag(self, tag, attrs):
388 if attrs.get('name') == self.name:
389 self.result = attrs.get('content')
391 def get_result(self):
394 def get_meta_content(name, html):
396 Return the content attribute from the meta tag with the given name attribute.
398 parser = MetaParser(name)
401 except compat_html_parser.HTMLParseError:
403 return parser.get_result()
406 def clean_html(html):
407 """Clean an HTML snippet into a readable string"""
409 html = html.replace('\n', ' ')
410 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
411 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
413 html = re.sub('<.*?>', '', html)
414 # Replace html entities
415 html = unescapeHTML(html)
419 def sanitize_open(filename, open_mode):
420 """Try to open the given filename, and slightly tweak it if this fails.
422 Attempts to open the given filename. If this fails, it tries to change
423 the filename slightly, step by step, until it's either able to open it
424 or it fails and raises a final exception, like the standard open()
427 It returns the tuple (stream, definitive_file_name).
431 if sys.platform == 'win32':
433 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
434 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
435 stream = open(encodeFilename(filename), open_mode)
436 return (stream, filename)
437 except (IOError, OSError) as err:
438 if err.errno in (errno.EACCES,):
441 # In case of error, try to remove win32 forbidden chars
442 alt_filename = os.path.join(
443 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
444 for path_part in os.path.split(filename)
446 if alt_filename == filename:
449 # An exception here should be caught in the caller
450 stream = open(encodeFilename(filename), open_mode)
451 return (stream, alt_filename)
454 def timeconvert(timestr):
455 """Convert RFC 2822 defined time string into system timestamp"""
457 timetuple = email.utils.parsedate_tz(timestr)
458 if timetuple is not None:
459 timestamp = email.utils.mktime_tz(timetuple)
462 def sanitize_filename(s, restricted=False, is_id=False):
463 """Sanitizes a string so it could be used as part of a filename.
464 If restricted is set, use a stricter subset of allowed characters.
465 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
467 def replace_insane(char):
468 if char == '?' or ord(char) < 32 or ord(char) == 127:
471 return '' if restricted else '\''
473 return '_-' if restricted else ' -'
474 elif char in '\\/|*<>':
476 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
478 if restricted and ord(char) > 127:
482 result = u''.join(map(replace_insane, s))
484 while '__' in result:
485 result = result.replace('__', '_')
486 result = result.strip('_')
487 # Common case of "Foreign band name - English song title"
488 if restricted and result.startswith('-_'):
494 def orderedSet(iterable):
495 """ Remove all duplicates from the input iterable """
506 assert type(s) == type(u'')
508 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
512 def encodeFilename(s, for_subprocess=False):
514 @param s The name of the file
517 assert type(s) == compat_str
519 # Python 3 has a Unicode API
520 if sys.version_info >= (3, 0):
523 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
524 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
525 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
526 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
527 if not for_subprocess:
530 # For subprocess calls, encode with locale encoding
531 # Refer to http://stackoverflow.com/a/9951851/35070
532 encoding = preferredencoding()
534 encoding = sys.getfilesystemencoding()
537 return s.encode(encoding, 'ignore')
540 def decodeOption(optval):
543 if isinstance(optval, bytes):
544 optval = optval.decode(preferredencoding())
546 assert isinstance(optval, compat_str)
549 def formatSeconds(secs):
551 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
553 return '%d:%02d' % (secs // 60, secs % 60)
558 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
559 if sys.version_info < (3, 2):
562 class HTTPSConnectionV3(httplib.HTTPSConnection):
563 def __init__(self, *args, **kwargs):
564 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
567 sock = socket.create_connection((self.host, self.port), self.timeout)
568 if getattr(self, '_tunnel_host', False):
572 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
574 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
576 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
577 def https_open(self, req):
578 return self.do_open(HTTPSConnectionV3, req)
579 return HTTPSHandlerV3(**kwargs)
581 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
582 context.verify_mode = (ssl.CERT_NONE
583 if opts_no_check_certificate
584 else ssl.CERT_REQUIRED)
585 context.set_default_verify_paths()
587 context.load_default_certs()
588 except AttributeError:
590 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
592 class ExtractorError(Exception):
593 """Error during info extraction."""
594 def __init__(self, msg, tb=None, expected=False, cause=None):
595 """ tb, if given, is the original traceback (so that it can be printed out).
596 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
599 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
602 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
603 super(ExtractorError, self).__init__(msg)
606 self.exc_info = sys.exc_info() # preserve original exception
609 def format_traceback(self):
610 if self.traceback is None:
612 return u''.join(traceback.format_tb(self.traceback))
615 class RegexNotFoundError(ExtractorError):
616 """Error when a regex didn't match"""
620 class DownloadError(Exception):
621 """Download Error exception.
623 This exception may be thrown by FileDownloader objects if they are not
624 configured to continue on errors. They will contain the appropriate
627 def __init__(self, msg, exc_info=None):
628 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
629 super(DownloadError, self).__init__(msg)
630 self.exc_info = exc_info
633 class SameFileError(Exception):
634 """Same File exception.
636 This exception will be thrown by FileDownloader objects if they detect
637 multiple files would have to be downloaded to the same file on disk.
642 class PostProcessingError(Exception):
643 """Post Processing exception.
645 This exception may be raised by PostProcessor's .run() method to
646 indicate an error in the postprocessing task.
648 def __init__(self, msg):
651 class MaxDownloadsReached(Exception):
652 """ --max-downloads limit has been reached. """
656 class UnavailableVideoError(Exception):
657 """Unavailable Format exception.
659 This exception will be thrown when a video is requested
660 in a format that is not available for that video.
665 class ContentTooShortError(Exception):
666 """Content Too Short exception.
668 This exception may be raised by FileDownloader objects when a file they
669 download is too small for what the server announced first, indicating
670 the connection was probably interrupted.
676 def __init__(self, downloaded, expected):
677 self.downloaded = downloaded
678 self.expected = expected
680 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
681 """Handler for HTTP requests and responses.
683 This class, when installed with an OpenerDirector, automatically adds
684 the standard headers to every HTTP request and handles gzipped and
685 deflated responses from web servers. If compression is to be avoided in
686 a particular request, the original request in the program code only has
687 to include the HTTP header "Youtubedl-No-Compression", which will be
688 removed before making the real request.
690 Part of this code was copied from:
692 http://techknack.net/python-urllib2-handlers/
694 Andrew Rowls, the author of that code, agreed to release it to the
701 return zlib.decompress(data, -zlib.MAX_WBITS)
703 return zlib.decompress(data)
706 def addinfourl_wrapper(stream, headers, url, code):
707 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
708 return compat_urllib_request.addinfourl(stream, headers, url, code)
709 ret = compat_urllib_request.addinfourl(stream, headers, url)
713 def http_request(self, req):
714 for h,v in std_headers.items():
718 if 'Youtubedl-no-compression' in req.headers:
719 if 'Accept-encoding' in req.headers:
720 del req.headers['Accept-encoding']
721 del req.headers['Youtubedl-no-compression']
722 if 'Youtubedl-user-agent' in req.headers:
723 if 'User-agent' in req.headers:
724 del req.headers['User-agent']
725 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
726 del req.headers['Youtubedl-user-agent']
729 def http_response(self, req, resp):
732 if resp.headers.get('Content-encoding', '') == 'gzip':
733 content = resp.read()
734 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
736 uncompressed = io.BytesIO(gz.read())
737 except IOError as original_ioerror:
738 # There may be junk add the end of the file
739 # See http://stackoverflow.com/q/4928560/35070 for details
740 for i in range(1, 1024):
742 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
743 uncompressed = io.BytesIO(gz.read())
748 raise original_ioerror
749 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
750 resp.msg = old_resp.msg
752 if resp.headers.get('Content-encoding', '') == 'deflate':
753 gz = io.BytesIO(self.deflate(resp.read()))
754 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
755 resp.msg = old_resp.msg
758 https_request = http_request
759 https_response = http_response
762 def unified_strdate(date_str):
763 """Return a string with the date in the format YYYYMMDD"""
766 date_str = date_str.replace(',', ' ')
767 # %z (UTC offset) is only supported in python>=3.2
768 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
769 format_expressions = [
779 '%Y-%m-%dT%H:%M:%SZ',
780 '%Y-%m-%dT%H:%M:%S.%fZ',
781 '%Y-%m-%dT%H:%M:%S.%f0Z',
783 '%Y-%m-%dT%H:%M:%S.%f',
786 for expression in format_expressions:
788 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
791 if upload_date is None:
792 timetuple = email.utils.parsedate_tz(date_str)
794 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
797 def determine_ext(url, default_ext=u'unknown_video'):
798 guess = url.partition(u'?')[0].rpartition(u'.')[2]
799 if re.match(r'^[A-Za-z0-9]+$', guess):
804 def subtitles_filename(filename, sub_lang, sub_format):
805 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
807 def date_from_str(date_str):
809 Return a datetime object from a string in the format YYYYMMDD or
810 (now|today)[+-][0-9](day|week|month|year)(s)?"""
811 today = datetime.date.today()
812 if date_str == 'now'or date_str == 'today':
814 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
815 if match is not None:
816 sign = match.group('sign')
817 time = int(match.group('time'))
820 unit = match.group('unit')
829 delta = datetime.timedelta(**{unit: time})
831 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
833 def hyphenate_date(date_str):
835 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
836 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
837 if match is not None:
838 return '-'.join(match.groups())
842 class DateRange(object):
843 """Represents a time interval between two dates"""
844 def __init__(self, start=None, end=None):
845 """start and end must be strings in the format accepted by date"""
846 if start is not None:
847 self.start = date_from_str(start)
849 self.start = datetime.datetime.min.date()
851 self.end = date_from_str(end)
853 self.end = datetime.datetime.max.date()
854 if self.start > self.end:
855 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
858 """Returns a range that only contains the given day"""
860 def __contains__(self, date):
861 """Check if the date is in the range"""
862 if not isinstance(date, datetime.date):
863 date = date_from_str(date)
864 return self.start <= date <= self.end
866 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
870 """ Returns the platform name as a compat_str """
871 res = platform.platform()
872 if isinstance(res, bytes):
873 res = res.decode(preferredencoding())
875 assert isinstance(res, compat_str)
879 def write_string(s, out=None):
882 assert type(s) == compat_str
884 if ('b' in getattr(out, 'mode', '') or
885 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
886 s = s.encode(preferredencoding(), 'ignore')
889 except UnicodeEncodeError:
890 # In Windows shells, this can fail even when the codec is just charmap!?
891 # See https://wiki.python.org/moin/PrintFails#Issue
892 if sys.platform == 'win32' and hasattr(out, 'encoding'):
893 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
901 def bytes_to_intlist(bs):
904 if isinstance(bs[0], int): # Python 3
907 return [ord(c) for c in bs]
910 def intlist_to_bytes(xs):
913 if isinstance(chr(0), bytes): # Python 2
914 return ''.join([chr(x) for x in xs])
919 def get_cachedir(params={}):
920 cache_root = os.environ.get('XDG_CACHE_HOME',
921 os.path.expanduser('~/.cache'))
922 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
925 # Cross-platform file locking
926 if sys.platform == 'win32':
927 import ctypes.wintypes
930 class OVERLAPPED(ctypes.Structure):
932 ('Internal', ctypes.wintypes.LPVOID),
933 ('InternalHigh', ctypes.wintypes.LPVOID),
934 ('Offset', ctypes.wintypes.DWORD),
935 ('OffsetHigh', ctypes.wintypes.DWORD),
936 ('hEvent', ctypes.wintypes.HANDLE),
939 kernel32 = ctypes.windll.kernel32
940 LockFileEx = kernel32.LockFileEx
941 LockFileEx.argtypes = [
942 ctypes.wintypes.HANDLE, # hFile
943 ctypes.wintypes.DWORD, # dwFlags
944 ctypes.wintypes.DWORD, # dwReserved
945 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
946 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
947 ctypes.POINTER(OVERLAPPED) # Overlapped
949 LockFileEx.restype = ctypes.wintypes.BOOL
950 UnlockFileEx = kernel32.UnlockFileEx
951 UnlockFileEx.argtypes = [
952 ctypes.wintypes.HANDLE, # hFile
953 ctypes.wintypes.DWORD, # dwReserved
954 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
955 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
956 ctypes.POINTER(OVERLAPPED) # Overlapped
958 UnlockFileEx.restype = ctypes.wintypes.BOOL
959 whole_low = 0xffffffff
960 whole_high = 0x7fffffff
962 def _lock_file(f, exclusive):
963 overlapped = OVERLAPPED()
964 overlapped.Offset = 0
965 overlapped.OffsetHigh = 0
966 overlapped.hEvent = 0
967 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
968 handle = msvcrt.get_osfhandle(f.fileno())
969 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
970 whole_low, whole_high, f._lock_file_overlapped_p):
971 raise OSError('Locking file failed: %r' % ctypes.FormatError())
974 assert f._lock_file_overlapped_p
975 handle = msvcrt.get_osfhandle(f.fileno())
976 if not UnlockFileEx(handle, 0,
977 whole_low, whole_high, f._lock_file_overlapped_p):
978 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
983 def _lock_file(f, exclusive):
984 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
987 fcntl.lockf(f, fcntl.LOCK_UN)
990 class locked_file(object):
991 def __init__(self, filename, mode, encoding=None):
992 assert mode in ['r', 'a', 'w']
993 self.f = io.open(filename, mode, encoding=encoding)
997 exclusive = self.mode != 'r'
999 _lock_file(self.f, exclusive)
1005 def __exit__(self, etype, value, traceback):
1007 _unlock_file(self.f)
1014 def write(self, *args):
1015 return self.f.write(*args)
1017 def read(self, *args):
1018 return self.f.read(*args)
1021 def shell_quote(args):
1023 encoding = sys.getfilesystemencoding()
1024 if encoding is None:
1027 if isinstance(a, bytes):
1028 # We may get a filename encoded with 'encodeFilename'
1029 a = a.decode(encoding)
1030 quoted_args.append(pipes.quote(a))
1031 return u' '.join(quoted_args)
1034 def takewhile_inclusive(pred, seq):
1035 """ Like itertools.takewhile, but include the latest evaluated element
1036 (the first element so that Not pred(e)) """
1043 def smuggle_url(url, data):
1044 """ Pass additional data in a URL for internal use. """
1046 sdata = compat_urllib_parse.urlencode(
1047 {u'__youtubedl_smuggle': json.dumps(data)})
1048 return url + u'#' + sdata
1051 def unsmuggle_url(smug_url, default=None):
1052 if not '#__youtubedl_smuggle' in smug_url:
1053 return smug_url, default
1054 url, _, sdata = smug_url.rpartition(u'#')
1055 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1056 data = json.loads(jsond)
1060 def format_bytes(bytes):
1063 if type(bytes) is str:
1064 bytes = float(bytes)
1068 exponent = int(math.log(bytes, 1024.0))
1069 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1070 converted = float(bytes) / float(1024 ** exponent)
1071 return u'%.2f%s' % (converted, suffix)
1074 def str_to_int(int_str):
1075 int_str = re.sub(r'[,\.]', u'', int_str)
1079 def get_term_width():
1080 columns = os.environ.get('COLUMNS', None)
1085 sp = subprocess.Popen(
1087 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1088 out, err = sp.communicate()
1089 return int(out.split()[1])
1095 def month_by_name(name):
1096 """ Return the number of a month by (locale-independently) English name """
1099 u'January', u'February', u'March', u'April', u'May', u'June',
1100 u'July', u'August', u'September', u'October', u'November', u'December']
1102 return ENGLISH_NAMES.index(name) + 1
1107 def fix_xml_ampersands(xml_str):
1108 """Replace all the '&' by '&' in XML"""
1110 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1115 def setproctitle(title):
1116 assert isinstance(title, compat_str)
1118 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1122 buf = ctypes.create_string_buffer(len(title) + 1)
1123 buf.value = title.encode('utf-8')
1125 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1126 except AttributeError:
1127 return # Strange libc, just skip this
1130 def remove_start(s, start):
1131 if s.startswith(start):
1132 return s[len(start):]
1136 def url_basename(url):
1137 path = compat_urlparse.urlparse(url).path
1138 return path.strip(u'/').split(u'/')[-1]
1141 class HEADRequest(compat_urllib_request.Request):
1142 def get_method(self):
1146 def int_or_none(v, scale=1):
1147 return v if v is None else (int(v) // scale)
1150 def parse_duration(s):
1155 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1158 res = int(m.group('secs'))
1160 res += int(m.group('mins')) * 60
1161 if m.group('hours'):
1162 res += int(m.group('hours')) * 60 * 60
1166 def prepend_extension(filename, ext):
1167 name, real_ext = os.path.splitext(filename)
1168 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1171 def check_executable(exe, args=[]):
1172 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1173 args can be a list of arguments for a short output (like -version) """
1175 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1181 class PagedList(object):
1182 def __init__(self, pagefunc, pagesize):
1183 self._pagefunc = pagefunc
1184 self._pagesize = pagesize
1187 # This is only useful for tests
1188 return len(self.getslice())
1190 def getslice(self, start=0, end=None):
1192 for pagenum in itertools.count(start // self._pagesize):
1193 firstid = pagenum * self._pagesize
1194 nextfirstid = pagenum * self._pagesize + self._pagesize
1195 if start >= nextfirstid:
1198 page_results = list(self._pagefunc(pagenum))
1201 start % self._pagesize
1202 if firstid <= start < nextfirstid
1206 ((end - 1) % self._pagesize) + 1
1207 if (end is not None and firstid <= end <= nextfirstid)
1210 if startv != 0 or endv is not None:
1211 page_results = page_results[startv:endv]
1212 res.extend(page_results)
1214 # A little optimization - if current page is not "full", ie. does
1215 # not contain page_size videos then we can assume that this page
1216 # is the last one - there are no more ids on further pages -
1217 # i.e. no need to query again.
1218 if len(page_results) + startv < self._pagesize:
1221 # If we got the whole page, but the next page is not interesting,
1222 # break out early as well
1223 if end == nextfirstid:
1228 def uppercase_escape(s):
1230 r'\\U([0-9a-fA-F]{8})',
1231 lambda m: compat_chr(int(m.group(1), base=16)), s)
1234 struct.pack(u'!I', 0)
1236 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1237 def struct_pack(spec, *args):
1238 if isinstance(spec, compat_str):
1239 spec = spec.encode('ascii')
1240 return struct.pack(spec, *args)
1242 def struct_unpack(spec, *args):
1243 if isinstance(spec, compat_str):
1244 spec = spec.encode('ascii')
1245 return struct.unpack(spec, *args)
1247 struct_pack = struct.pack
1248 struct_unpack = struct.unpack
1251 def read_batch_urls(batch_fd):
1253 if not isinstance(url, compat_str):
1254 url = url.decode('utf-8', 'replace')
1255 BOM_UTF8 = u'\xef\xbb\xbf'
1256 if url.startswith(BOM_UTF8):
1257 url = url[len(BOM_UTF8):]
1259 if url.startswith(('#', ';', ']')):
1263 with contextlib.closing(batch_fd) as fd:
1264 return [url for url in map(fixup, fd) if url]