2 # -*- coding: utf-8 -*-
25 import xml.etree.ElementTree
29 import urllib.request as compat_urllib_request
30 except ImportError: # Python 2
31 import urllib2 as compat_urllib_request
34 import urllib.error as compat_urllib_error
35 except ImportError: # Python 2
36 import urllib2 as compat_urllib_error
39 import urllib.parse as compat_urllib_parse
40 except ImportError: # Python 2
41 import urllib as compat_urllib_parse
44 from urllib.parse import urlparse as compat_urllib_parse_urlparse
45 except ImportError: # Python 2
46 from urlparse import urlparse as compat_urllib_parse_urlparse
49 import urllib.parse as compat_urlparse
50 except ImportError: # Python 2
51 import urlparse as compat_urlparse
54 import http.cookiejar as compat_cookiejar
55 except ImportError: # Python 2
56 import cookielib as compat_cookiejar
59 import html.entities as compat_html_entities
60 except ImportError: # Python 2
61 import htmlentitydefs as compat_html_entities
64 import html.parser as compat_html_parser
65 except ImportError: # Python 2
66 import HTMLParser as compat_html_parser
69 import http.client as compat_http_client
70 except ImportError: # Python 2
71 import httplib as compat_http_client
74 from urllib.error import HTTPError as compat_HTTPError
75 except ImportError: # Python 2
76 from urllib2 import HTTPError as compat_HTTPError
79 from urllib.request import urlretrieve as compat_urlretrieve
80 except ImportError: # Python 2
81 from urllib import urlretrieve as compat_urlretrieve
85 from subprocess import DEVNULL
86 compat_subprocess_get_DEVNULL = lambda: DEVNULL
88 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
91 from urllib.parse import parse_qs as compat_parse_qs
92 except ImportError: # Python 2
93 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
94 # Python 2's version is apparently totally broken
95 def _unquote(string, encoding='utf-8', errors='replace'):
98 res = string.split('%')
105 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
112 pct_sequence += item[:2].decode('hex')
115 # This segment was just a single percent-encoded character.
116 # May be part of a sequence of code units, so delay decoding.
117 # (Stored in pct_sequence).
121 # Encountered non-percent-encoded characters. Flush the current
123 string += pct_sequence.decode(encoding, errors) + rest
126 # Flush the final pct_sequence
127 string += pct_sequence.decode(encoding, errors)
130 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
131 encoding='utf-8', errors='replace'):
132 qs, _coerce_result = qs, unicode
133 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
135 for name_value in pairs:
136 if not name_value and not strict_parsing:
138 nv = name_value.split('=', 1)
141 raise ValueError("bad query field: %r" % (name_value,))
142 # Handle case of a control-name with no equal sign
143 if keep_blank_values:
147 if len(nv[1]) or keep_blank_values:
148 name = nv[0].replace('+', ' ')
149 name = _unquote(name, encoding=encoding, errors=errors)
150 name = _coerce_result(name)
151 value = nv[1].replace('+', ' ')
152 value = _unquote(value, encoding=encoding, errors=errors)
153 value = _coerce_result(value)
154 r.append((name, value))
157 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
158 encoding='utf-8', errors='replace'):
160 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
161 encoding=encoding, errors=errors)
162 for name, value in pairs:
163 if name in parsed_result:
164 parsed_result[name].append(value)
166 parsed_result[name] = [value]
170 compat_str = unicode # Python 2
175 compat_chr = unichr # Python 2
180 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
181 except ImportError: # Python 2.6
182 from xml.parsers.expat import ExpatError as compat_xml_parse_error
185 if type(c) is int: return c
188 # This is not clearly defined otherwise
189 compiled_regex_type = type(re.compile(''))
192 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
193 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
194 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
195 'Accept-Encoding': 'gzip, deflate',
196 'Accept-Language': 'en-us,en;q=0.5',
199 def preferredencoding():
200 """Get preferred encoding.
202 Returns the best encoding scheme for the system, based on
203 locale.getpreferredencoding() and some further tweaks.
206 pref = locale.getpreferredencoding()
213 if sys.version_info < (3,0):
215 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
218 assert type(s) == type(u'')
221 # In Python 2.x, json.dump expects a bytestream.
222 # In Python 3.x, it writes to a character stream
223 if sys.version_info < (3,0):
224 def write_json_file(obj, fn):
225 with open(fn, 'wb') as f:
228 def write_json_file(obj, fn):
229 with open(fn, 'w', encoding='utf-8') as f:
232 if sys.version_info >= (2,7):
233 def find_xpath_attr(node, xpath, key, val):
234 """ Find the xpath xpath[@key=val] """
235 assert re.match(r'^[a-zA-Z]+$', key)
236 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
237 expr = xpath + u"[@%s='%s']" % (key, val)
238 return node.find(expr)
240 def find_xpath_attr(node, xpath, key, val):
241 for f in node.findall(xpath):
242 if f.attrib.get(key) == val:
246 # On python2.6 the xml.etree.ElementTree.Element methods don't support
247 # the namespace parameter
248 def xpath_with_ns(path, ns_map):
249 components = [c.split(':') for c in path.split('/')]
253 replaced.append(c[0])
256 replaced.append('{%s}%s' % (ns_map[ns], tag))
257 return '/'.join(replaced)
259 def htmlentity_transform(matchobj):
260 """Transforms an HTML entity to a character.
262 This function receives a match object and is intended to be used with
263 the re.sub() function.
265 entity = matchobj.group(1)
267 # Known non-numeric HTML entity
268 if entity in compat_html_entities.name2codepoint:
269 return compat_chr(compat_html_entities.name2codepoint[entity])
271 mobj = re.match(u'(?u)#(x?\\d+)', entity)
273 numstr = mobj.group(1)
274 if numstr.startswith(u'x'):
276 numstr = u'0%s' % numstr
279 return compat_chr(int(numstr, base))
281 # Unknown entity in name, return its literal representation
282 return (u'&%s;' % entity)
284 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
285 class BaseHTMLParser(compat_html_parser.HTMLParser):
287 compat_html_parser.HTMLParser.__init__(self)
290 def loads(self, html):
295 class AttrParser(BaseHTMLParser):
296 """Modified HTMLParser that isolates a tag with the specified attribute"""
297 def __init__(self, attribute, value):
298 self.attribute = attribute
303 self.watch_startpos = False
305 BaseHTMLParser.__init__(self)
307 def error(self, message):
308 if self.error_count > 10 or self.started:
309 raise compat_html_parser.HTMLParseError(message, self.getpos())
310 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
311 self.error_count += 1
314 def handle_starttag(self, tag, attrs):
317 self.find_startpos(None)
318 if self.attribute in attrs and attrs[self.attribute] == self.value:
321 self.watch_startpos = True
323 if not tag in self.depth: self.depth[tag] = 0
326 def handle_endtag(self, tag):
328 if tag in self.depth: self.depth[tag] -= 1
329 if self.depth[self.result[0]] == 0:
331 self.result.append(self.getpos())
333 def find_startpos(self, x):
334 """Needed to put the start position of the result (self.result[1])
335 after the opening tag with the requested id"""
336 if self.watch_startpos:
337 self.watch_startpos = False
338 self.result.append(self.getpos())
339 handle_entityref = handle_charref = handle_data = handle_comment = \
340 handle_decl = handle_pi = unknown_decl = find_startpos
342 def get_result(self):
343 if self.result is None:
345 if len(self.result) != 3:
347 lines = self.html.split('\n')
348 lines = lines[self.result[1][0]-1:self.result[2][0]]
349 lines[0] = lines[0][self.result[1][1]:]
351 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
352 lines[-1] = lines[-1][:self.result[2][1]]
353 return '\n'.join(lines).strip()
354 # Hack for https://github.com/rg3/youtube-dl/issues/662
355 if sys.version_info < (2, 7, 3):
356 AttrParser.parse_endtag = (lambda self, i:
357 i + len("</scr'+'ipt>")
358 if self.rawdata[i:].startswith("</scr'+'ipt>")
359 else compat_html_parser.HTMLParser.parse_endtag(self, i))
361 def get_element_by_id(id, html):
362 """Return the content of the tag with the specified ID in the passed HTML document"""
363 return get_element_by_attribute("id", id, html)
365 def get_element_by_attribute(attribute, value, html):
366 """Return the content of the tag with the specified attribute in the passed HTML document"""
367 parser = AttrParser(attribute, value)
370 except compat_html_parser.HTMLParseError:
372 return parser.get_result()
374 class MetaParser(BaseHTMLParser):
376 Modified HTMLParser that isolates a meta tag with the specified name
379 def __init__(self, name):
380 BaseHTMLParser.__init__(self)
385 def handle_starttag(self, tag, attrs):
389 if attrs.get('name') == self.name:
390 self.result = attrs.get('content')
392 def get_result(self):
395 def get_meta_content(name, html):
397 Return the content attribute from the meta tag with the given name attribute.
399 parser = MetaParser(name)
402 except compat_html_parser.HTMLParseError:
404 return parser.get_result()
407 def clean_html(html):
408 """Clean an HTML snippet into a readable string"""
410 html = html.replace('\n', ' ')
411 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
412 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
414 html = re.sub('<.*?>', '', html)
415 # Replace html entities
416 html = unescapeHTML(html)
420 def sanitize_open(filename, open_mode):
421 """Try to open the given filename, and slightly tweak it if this fails.
423 Attempts to open the given filename. If this fails, it tries to change
424 the filename slightly, step by step, until it's either able to open it
425 or it fails and raises a final exception, like the standard open()
428 It returns the tuple (stream, definitive_file_name).
432 if sys.platform == 'win32':
434 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
435 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
436 stream = open(encodeFilename(filename), open_mode)
437 return (stream, filename)
438 except (IOError, OSError) as err:
439 if err.errno in (errno.EACCES,):
442 # In case of error, try to remove win32 forbidden chars
443 alt_filename = os.path.join(
444 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
445 for path_part in os.path.split(filename)
447 if alt_filename == filename:
450 # An exception here should be caught in the caller
451 stream = open(encodeFilename(filename), open_mode)
452 return (stream, alt_filename)
455 def timeconvert(timestr):
456 """Convert RFC 2822 defined time string into system timestamp"""
458 timetuple = email.utils.parsedate_tz(timestr)
459 if timetuple is not None:
460 timestamp = email.utils.mktime_tz(timetuple)
463 def sanitize_filename(s, restricted=False, is_id=False):
464 """Sanitizes a string so it could be used as part of a filename.
465 If restricted is set, use a stricter subset of allowed characters.
466 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
468 def replace_insane(char):
469 if char == '?' or ord(char) < 32 or ord(char) == 127:
472 return '' if restricted else '\''
474 return '_-' if restricted else ' -'
475 elif char in '\\/|*<>':
477 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
479 if restricted and ord(char) > 127:
483 result = u''.join(map(replace_insane, s))
485 while '__' in result:
486 result = result.replace('__', '_')
487 result = result.strip('_')
488 # Common case of "Foreign band name - English song title"
489 if restricted and result.startswith('-_'):
495 def orderedSet(iterable):
496 """ Remove all duplicates from the input iterable """
507 assert type(s) == type(u'')
509 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
513 def encodeFilename(s, for_subprocess=False):
515 @param s The name of the file
518 assert type(s) == compat_str
520 # Python 3 has a Unicode API
521 if sys.version_info >= (3, 0):
524 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
525 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
526 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
527 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
528 if not for_subprocess:
531 # For subprocess calls, encode with locale encoding
532 # Refer to http://stackoverflow.com/a/9951851/35070
533 encoding = preferredencoding()
535 encoding = sys.getfilesystemencoding()
538 return s.encode(encoding, 'ignore')
541 def decodeOption(optval):
544 if isinstance(optval, bytes):
545 optval = optval.decode(preferredencoding())
547 assert isinstance(optval, compat_str)
550 def formatSeconds(secs):
552 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
554 return '%d:%02d' % (secs // 60, secs % 60)
559 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
560 if sys.version_info < (3, 2):
563 class HTTPSConnectionV3(httplib.HTTPSConnection):
564 def __init__(self, *args, **kwargs):
565 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
568 sock = socket.create_connection((self.host, self.port), self.timeout)
569 if getattr(self, '_tunnel_host', False):
573 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
575 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
577 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
578 def https_open(self, req):
579 return self.do_open(HTTPSConnectionV3, req)
580 return HTTPSHandlerV3(**kwargs)
582 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
583 context.verify_mode = (ssl.CERT_NONE
584 if opts_no_check_certificate
585 else ssl.CERT_REQUIRED)
586 context.set_default_verify_paths()
588 context.load_default_certs()
589 except AttributeError:
591 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
593 class ExtractorError(Exception):
594 """Error during info extraction."""
595 def __init__(self, msg, tb=None, expected=False, cause=None):
596 """ tb, if given, is the original traceback (so that it can be printed out).
597 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
600 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
603 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
604 super(ExtractorError, self).__init__(msg)
607 self.exc_info = sys.exc_info() # preserve original exception
610 def format_traceback(self):
611 if self.traceback is None:
613 return u''.join(traceback.format_tb(self.traceback))
616 class RegexNotFoundError(ExtractorError):
617 """Error when a regex didn't match"""
621 class DownloadError(Exception):
622 """Download Error exception.
624 This exception may be thrown by FileDownloader objects if they are not
625 configured to continue on errors. They will contain the appropriate
628 def __init__(self, msg, exc_info=None):
629 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
630 super(DownloadError, self).__init__(msg)
631 self.exc_info = exc_info
634 class SameFileError(Exception):
635 """Same File exception.
637 This exception will be thrown by FileDownloader objects if they detect
638 multiple files would have to be downloaded to the same file on disk.
643 class PostProcessingError(Exception):
644 """Post Processing exception.
646 This exception may be raised by PostProcessor's .run() method to
647 indicate an error in the postprocessing task.
649 def __init__(self, msg):
652 class MaxDownloadsReached(Exception):
653 """ --max-downloads limit has been reached. """
657 class UnavailableVideoError(Exception):
658 """Unavailable Format exception.
660 This exception will be thrown when a video is requested
661 in a format that is not available for that video.
666 class ContentTooShortError(Exception):
667 """Content Too Short exception.
669 This exception may be raised by FileDownloader objects when a file they
670 download is too small for what the server announced first, indicating
671 the connection was probably interrupted.
677 def __init__(self, downloaded, expected):
678 self.downloaded = downloaded
679 self.expected = expected
681 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
682 """Handler for HTTP requests and responses.
684 This class, when installed with an OpenerDirector, automatically adds
685 the standard headers to every HTTP request and handles gzipped and
686 deflated responses from web servers. If compression is to be avoided in
687 a particular request, the original request in the program code only has
688 to include the HTTP header "Youtubedl-No-Compression", which will be
689 removed before making the real request.
691 Part of this code was copied from:
693 http://techknack.net/python-urllib2-handlers/
695 Andrew Rowls, the author of that code, agreed to release it to the
702 return zlib.decompress(data, -zlib.MAX_WBITS)
704 return zlib.decompress(data)
707 def addinfourl_wrapper(stream, headers, url, code):
708 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
709 return compat_urllib_request.addinfourl(stream, headers, url, code)
710 ret = compat_urllib_request.addinfourl(stream, headers, url)
714 def http_request(self, req):
715 for h,v in std_headers.items():
719 if 'Youtubedl-no-compression' in req.headers:
720 if 'Accept-encoding' in req.headers:
721 del req.headers['Accept-encoding']
722 del req.headers['Youtubedl-no-compression']
723 if 'Youtubedl-user-agent' in req.headers:
724 if 'User-agent' in req.headers:
725 del req.headers['User-agent']
726 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
727 del req.headers['Youtubedl-user-agent']
730 def http_response(self, req, resp):
733 if resp.headers.get('Content-encoding', '') == 'gzip':
734 content = resp.read()
735 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
737 uncompressed = io.BytesIO(gz.read())
738 except IOError as original_ioerror:
739 # There may be junk add the end of the file
740 # See http://stackoverflow.com/q/4928560/35070 for details
741 for i in range(1, 1024):
743 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
744 uncompressed = io.BytesIO(gz.read())
749 raise original_ioerror
750 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
751 resp.msg = old_resp.msg
753 if resp.headers.get('Content-encoding', '') == 'deflate':
754 gz = io.BytesIO(self.deflate(resp.read()))
755 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
756 resp.msg = old_resp.msg
759 https_request = http_request
760 https_response = http_response
763 def unified_strdate(date_str):
764 """Return a string with the date in the format YYYYMMDD"""
767 date_str = date_str.replace(',', ' ')
768 # %z (UTC offset) is only supported in python>=3.2
769 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
770 format_expressions = [
781 '%Y-%m-%dT%H:%M:%SZ',
782 '%Y-%m-%dT%H:%M:%S.%fZ',
783 '%Y-%m-%dT%H:%M:%S.%f0Z',
785 '%Y-%m-%dT%H:%M:%S.%f',
788 for expression in format_expressions:
790 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
793 if upload_date is None:
794 timetuple = email.utils.parsedate_tz(date_str)
796 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
799 def determine_ext(url, default_ext=u'unknown_video'):
800 guess = url.partition(u'?')[0].rpartition(u'.')[2]
801 if re.match(r'^[A-Za-z0-9]+$', guess):
806 def subtitles_filename(filename, sub_lang, sub_format):
807 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
809 def date_from_str(date_str):
811 Return a datetime object from a string in the format YYYYMMDD or
812 (now|today)[+-][0-9](day|week|month|year)(s)?"""
813 today = datetime.date.today()
814 if date_str == 'now'or date_str == 'today':
816 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
817 if match is not None:
818 sign = match.group('sign')
819 time = int(match.group('time'))
822 unit = match.group('unit')
831 delta = datetime.timedelta(**{unit: time})
833 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
835 def hyphenate_date(date_str):
837 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
838 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
839 if match is not None:
840 return '-'.join(match.groups())
844 class DateRange(object):
845 """Represents a time interval between two dates"""
846 def __init__(self, start=None, end=None):
847 """start and end must be strings in the format accepted by date"""
848 if start is not None:
849 self.start = date_from_str(start)
851 self.start = datetime.datetime.min.date()
853 self.end = date_from_str(end)
855 self.end = datetime.datetime.max.date()
856 if self.start > self.end:
857 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
860 """Returns a range that only contains the given day"""
862 def __contains__(self, date):
863 """Check if the date is in the range"""
864 if not isinstance(date, datetime.date):
865 date = date_from_str(date)
866 return self.start <= date <= self.end
868 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
872 """ Returns the platform name as a compat_str """
873 res = platform.platform()
874 if isinstance(res, bytes):
875 res = res.decode(preferredencoding())
877 assert isinstance(res, compat_str)
881 def write_string(s, out=None):
884 assert type(s) == compat_str
886 if ('b' in getattr(out, 'mode', '') or
887 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
888 s = s.encode(preferredencoding(), 'ignore')
891 except UnicodeEncodeError:
892 # In Windows shells, this can fail even when the codec is just charmap!?
893 # See https://wiki.python.org/moin/PrintFails#Issue
894 if sys.platform == 'win32' and hasattr(out, 'encoding'):
895 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
903 def bytes_to_intlist(bs):
906 if isinstance(bs[0], int): # Python 3
909 return [ord(c) for c in bs]
912 def intlist_to_bytes(xs):
915 if isinstance(chr(0), bytes): # Python 2
916 return ''.join([chr(x) for x in xs])
921 def get_cachedir(params={}):
922 cache_root = os.environ.get('XDG_CACHE_HOME',
923 os.path.expanduser('~/.cache'))
924 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
927 # Cross-platform file locking
928 if sys.platform == 'win32':
929 import ctypes.wintypes
932 class OVERLAPPED(ctypes.Structure):
934 ('Internal', ctypes.wintypes.LPVOID),
935 ('InternalHigh', ctypes.wintypes.LPVOID),
936 ('Offset', ctypes.wintypes.DWORD),
937 ('OffsetHigh', ctypes.wintypes.DWORD),
938 ('hEvent', ctypes.wintypes.HANDLE),
941 kernel32 = ctypes.windll.kernel32
942 LockFileEx = kernel32.LockFileEx
943 LockFileEx.argtypes = [
944 ctypes.wintypes.HANDLE, # hFile
945 ctypes.wintypes.DWORD, # dwFlags
946 ctypes.wintypes.DWORD, # dwReserved
947 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
948 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
949 ctypes.POINTER(OVERLAPPED) # Overlapped
951 LockFileEx.restype = ctypes.wintypes.BOOL
952 UnlockFileEx = kernel32.UnlockFileEx
953 UnlockFileEx.argtypes = [
954 ctypes.wintypes.HANDLE, # hFile
955 ctypes.wintypes.DWORD, # dwReserved
956 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
957 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
958 ctypes.POINTER(OVERLAPPED) # Overlapped
960 UnlockFileEx.restype = ctypes.wintypes.BOOL
961 whole_low = 0xffffffff
962 whole_high = 0x7fffffff
964 def _lock_file(f, exclusive):
965 overlapped = OVERLAPPED()
966 overlapped.Offset = 0
967 overlapped.OffsetHigh = 0
968 overlapped.hEvent = 0
969 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
970 handle = msvcrt.get_osfhandle(f.fileno())
971 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
972 whole_low, whole_high, f._lock_file_overlapped_p):
973 raise OSError('Locking file failed: %r' % ctypes.FormatError())
976 assert f._lock_file_overlapped_p
977 handle = msvcrt.get_osfhandle(f.fileno())
978 if not UnlockFileEx(handle, 0,
979 whole_low, whole_high, f._lock_file_overlapped_p):
980 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
985 def _lock_file(f, exclusive):
986 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
989 fcntl.lockf(f, fcntl.LOCK_UN)
992 class locked_file(object):
993 def __init__(self, filename, mode, encoding=None):
994 assert mode in ['r', 'a', 'w']
995 self.f = io.open(filename, mode, encoding=encoding)
999 exclusive = self.mode != 'r'
1001 _lock_file(self.f, exclusive)
1007 def __exit__(self, etype, value, traceback):
1009 _unlock_file(self.f)
1016 def write(self, *args):
1017 return self.f.write(*args)
1019 def read(self, *args):
1020 return self.f.read(*args)
1023 def shell_quote(args):
1025 encoding = sys.getfilesystemencoding()
1026 if encoding is None:
1029 if isinstance(a, bytes):
1030 # We may get a filename encoded with 'encodeFilename'
1031 a = a.decode(encoding)
1032 quoted_args.append(pipes.quote(a))
1033 return u' '.join(quoted_args)
1036 def takewhile_inclusive(pred, seq):
1037 """ Like itertools.takewhile, but include the latest evaluated element
1038 (the first element so that Not pred(e)) """
1045 def smuggle_url(url, data):
1046 """ Pass additional data in a URL for internal use. """
1048 sdata = compat_urllib_parse.urlencode(
1049 {u'__youtubedl_smuggle': json.dumps(data)})
1050 return url + u'#' + sdata
1053 def unsmuggle_url(smug_url, default=None):
1054 if not '#__youtubedl_smuggle' in smug_url:
1055 return smug_url, default
1056 url, _, sdata = smug_url.rpartition(u'#')
1057 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1058 data = json.loads(jsond)
1062 def format_bytes(bytes):
1065 if type(bytes) is str:
1066 bytes = float(bytes)
1070 exponent = int(math.log(bytes, 1024.0))
1071 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1072 converted = float(bytes) / float(1024 ** exponent)
1073 return u'%.2f%s' % (converted, suffix)
1076 def str_to_int(int_str):
1077 int_str = re.sub(r'[,\.]', u'', int_str)
1081 def get_term_width():
1082 columns = os.environ.get('COLUMNS', None)
1087 sp = subprocess.Popen(
1089 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1090 out, err = sp.communicate()
1091 return int(out.split()[1])
1097 def month_by_name(name):
1098 """ Return the number of a month by (locale-independently) English name """
1101 u'January', u'February', u'March', u'April', u'May', u'June',
1102 u'July', u'August', u'September', u'October', u'November', u'December']
1104 return ENGLISH_NAMES.index(name) + 1
1109 def fix_xml_ampersands(xml_str):
1110 """Replace all the '&' by '&' in XML"""
1112 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1117 def setproctitle(title):
1118 assert isinstance(title, compat_str)
1120 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1124 buf = ctypes.create_string_buffer(len(title) + 1)
1125 buf.value = title.encode('utf-8')
1127 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1128 except AttributeError:
1129 return # Strange libc, just skip this
1132 def remove_start(s, start):
1133 if s.startswith(start):
1134 return s[len(start):]
1138 def url_basename(url):
1139 path = compat_urlparse.urlparse(url).path
1140 return path.strip(u'/').split(u'/')[-1]
1143 class HEADRequest(compat_urllib_request.Request):
1144 def get_method(self):
1148 def int_or_none(v, scale=1):
1149 return v if v is None else (int(v) // scale)
1152 def parse_duration(s):
1157 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1160 res = int(m.group('secs'))
1162 res += int(m.group('mins')) * 60
1163 if m.group('hours'):
1164 res += int(m.group('hours')) * 60 * 60
1168 def prepend_extension(filename, ext):
1169 name, real_ext = os.path.splitext(filename)
1170 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1173 def check_executable(exe, args=[]):
1174 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1175 args can be a list of arguments for a short output (like -version) """
1177 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1183 class PagedList(object):
1184 def __init__(self, pagefunc, pagesize):
1185 self._pagefunc = pagefunc
1186 self._pagesize = pagesize
1189 # This is only useful for tests
1190 return len(self.getslice())
1192 def getslice(self, start=0, end=None):
1194 for pagenum in itertools.count(start // self._pagesize):
1195 firstid = pagenum * self._pagesize
1196 nextfirstid = pagenum * self._pagesize + self._pagesize
1197 if start >= nextfirstid:
1200 page_results = list(self._pagefunc(pagenum))
1203 start % self._pagesize
1204 if firstid <= start < nextfirstid
1208 ((end - 1) % self._pagesize) + 1
1209 if (end is not None and firstid <= end <= nextfirstid)
1212 if startv != 0 or endv is not None:
1213 page_results = page_results[startv:endv]
1214 res.extend(page_results)
1216 # A little optimization - if current page is not "full", ie. does
1217 # not contain page_size videos then we can assume that this page
1218 # is the last one - there are no more ids on further pages -
1219 # i.e. no need to query again.
1220 if len(page_results) + startv < self._pagesize:
1223 # If we got the whole page, but the next page is not interesting,
1224 # break out early as well
1225 if end == nextfirstid:
1230 def uppercase_escape(s):
1232 r'\\U([0-9a-fA-F]{8})',
1233 lambda m: compat_chr(int(m.group(1), base=16)), s)
1236 struct.pack(u'!I', 0)
1238 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1239 def struct_pack(spec, *args):
1240 if isinstance(spec, compat_str):
1241 spec = spec.encode('ascii')
1242 return struct.pack(spec, *args)
1244 def struct_unpack(spec, *args):
1245 if isinstance(spec, compat_str):
1246 spec = spec.encode('ascii')
1247 return struct.unpack(spec, *args)
1249 struct_pack = struct.pack
1250 struct_unpack = struct.unpack
1253 def read_batch_urls(batch_fd):
1255 if not isinstance(url, compat_str):
1256 url = url.decode('utf-8', 'replace')
1257 BOM_UTF8 = u'\xef\xbb\xbf'
1258 if url.startswith(BOM_UTF8):
1259 url = url[len(BOM_UTF8):]
1261 if url.startswith(('#', ';', ']')):
1265 with contextlib.closing(batch_fd) as fd:
1266 return [url for url in map(fixup, fd) if url]
1269 def urlencode_postdata(*args, **kargs):
1270 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1274 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1275 def doctype(self, name, pubid, system):
1276 pass # Ignore doctypes
1278 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1279 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1280 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)