2 # -*- coding: utf-8 -*-
28 import xml.etree.ElementTree
32 import urllib.request as compat_urllib_request
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_request
37 import urllib.error as compat_urllib_error
38 except ImportError: # Python 2
39 import urllib2 as compat_urllib_error
42 import urllib.parse as compat_urllib_parse
43 except ImportError: # Python 2
44 import urllib as compat_urllib_parse
47 from urllib.parse import urlparse as compat_urllib_parse_urlparse
48 except ImportError: # Python 2
49 from urlparse import urlparse as compat_urllib_parse_urlparse
52 import urllib.parse as compat_urlparse
53 except ImportError: # Python 2
54 import urlparse as compat_urlparse
57 import http.cookiejar as compat_cookiejar
58 except ImportError: # Python 2
59 import cookielib as compat_cookiejar
62 import html.entities as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs as compat_html_entities
67 import html.parser as compat_html_parser
68 except ImportError: # Python 2
69 import HTMLParser as compat_html_parser
72 import http.client as compat_http_client
73 except ImportError: # Python 2
74 import httplib as compat_http_client
77 from urllib.error import HTTPError as compat_HTTPError
78 except ImportError: # Python 2
79 from urllib2 import HTTPError as compat_HTTPError
82 from urllib.request import urlretrieve as compat_urlretrieve
83 except ImportError: # Python 2
84 from urllib import urlretrieve as compat_urlretrieve
88 from subprocess import DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: DEVNULL
91 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
94 from urllib.parse import parse_qs as compat_parse_qs
95 except ImportError: # Python 2
96 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
97 # Python 2's version is apparently totally broken
98 def _unquote(string, encoding='utf-8', errors='replace'):
101 res = string.split('%')
108 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
115 pct_sequence += item[:2].decode('hex')
118 # This segment was just a single percent-encoded character.
119 # May be part of a sequence of code units, so delay decoding.
120 # (Stored in pct_sequence).
124 # Encountered non-percent-encoded characters. Flush the current
126 string += pct_sequence.decode(encoding, errors) + rest
129 # Flush the final pct_sequence
130 string += pct_sequence.decode(encoding, errors)
133 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
134 encoding='utf-8', errors='replace'):
135 qs, _coerce_result = qs, unicode
136 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
138 for name_value in pairs:
139 if not name_value and not strict_parsing:
141 nv = name_value.split('=', 1)
144 raise ValueError("bad query field: %r" % (name_value,))
145 # Handle case of a control-name with no equal sign
146 if keep_blank_values:
150 if len(nv[1]) or keep_blank_values:
151 name = nv[0].replace('+', ' ')
152 name = _unquote(name, encoding=encoding, errors=errors)
153 name = _coerce_result(name)
154 value = nv[1].replace('+', ' ')
155 value = _unquote(value, encoding=encoding, errors=errors)
156 value = _coerce_result(value)
157 r.append((name, value))
160 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
161 encoding='utf-8', errors='replace'):
163 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
164 encoding=encoding, errors=errors)
165 for name, value in pairs:
166 if name in parsed_result:
167 parsed_result[name].append(value)
169 parsed_result[name] = [value]
173 compat_str = unicode # Python 2
178 compat_chr = unichr # Python 2
183 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
184 except ImportError: # Python 2.6
185 from xml.parsers.expat import ExpatError as compat_xml_parse_error
188 if type(c) is int: return c
191 # This is not clearly defined otherwise
192 compiled_regex_type = type(re.compile(''))
195 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
196 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
197 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
198 'Accept-Encoding': 'gzip, deflate',
199 'Accept-Language': 'en-us,en;q=0.5',
202 def preferredencoding():
203 """Get preferred encoding.
205 Returns the best encoding scheme for the system, based on
206 locale.getpreferredencoding() and some further tweaks.
209 pref = locale.getpreferredencoding()
216 if sys.version_info < (3,0):
218 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
221 assert type(s) == type(u'')
224 # In Python 2.x, json.dump expects a bytestream.
225 # In Python 3.x, it writes to a character stream
226 if sys.version_info < (3,0):
227 def write_json_file(obj, fn):
228 with open(fn, 'wb') as f:
231 def write_json_file(obj, fn):
232 with open(fn, 'w', encoding='utf-8') as f:
235 if sys.version_info >= (2,7):
236 def find_xpath_attr(node, xpath, key, val):
237 """ Find the xpath xpath[@key=val] """
238 assert re.match(r'^[a-zA-Z]+$', key)
239 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
240 expr = xpath + u"[@%s='%s']" % (key, val)
241 return node.find(expr)
243 def find_xpath_attr(node, xpath, key, val):
244 for f in node.findall(xpath):
245 if f.attrib.get(key) == val:
249 # On python2.6 the xml.etree.ElementTree.Element methods don't support
250 # the namespace parameter
251 def xpath_with_ns(path, ns_map):
252 components = [c.split(':') for c in path.split('/')]
256 replaced.append(c[0])
259 replaced.append('{%s}%s' % (ns_map[ns], tag))
260 return '/'.join(replaced)
262 def htmlentity_transform(matchobj):
263 """Transforms an HTML entity to a character.
265 This function receives a match object and is intended to be used with
266 the re.sub() function.
268 entity = matchobj.group(1)
270 # Known non-numeric HTML entity
271 if entity in compat_html_entities.name2codepoint:
272 return compat_chr(compat_html_entities.name2codepoint[entity])
274 mobj = re.match(u'(?u)#(x?\\d+)', entity)
276 numstr = mobj.group(1)
277 if numstr.startswith(u'x'):
279 numstr = u'0%s' % numstr
282 return compat_chr(int(numstr, base))
284 # Unknown entity in name, return its literal representation
285 return (u'&%s;' % entity)
287 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
288 class BaseHTMLParser(compat_html_parser.HTMLParser):
290 compat_html_parser.HTMLParser.__init__(self)
293 def loads(self, html):
298 class AttrParser(BaseHTMLParser):
299 """Modified HTMLParser that isolates a tag with the specified attribute"""
300 def __init__(self, attribute, value):
301 self.attribute = attribute
306 self.watch_startpos = False
308 BaseHTMLParser.__init__(self)
310 def error(self, message):
311 if self.error_count > 10 or self.started:
312 raise compat_html_parser.HTMLParseError(message, self.getpos())
313 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
314 self.error_count += 1
317 def handle_starttag(self, tag, attrs):
320 self.find_startpos(None)
321 if self.attribute in attrs and attrs[self.attribute] == self.value:
324 self.watch_startpos = True
326 if not tag in self.depth: self.depth[tag] = 0
329 def handle_endtag(self, tag):
331 if tag in self.depth: self.depth[tag] -= 1
332 if self.depth[self.result[0]] == 0:
334 self.result.append(self.getpos())
336 def find_startpos(self, x):
337 """Needed to put the start position of the result (self.result[1])
338 after the opening tag with the requested id"""
339 if self.watch_startpos:
340 self.watch_startpos = False
341 self.result.append(self.getpos())
342 handle_entityref = handle_charref = handle_data = handle_comment = \
343 handle_decl = handle_pi = unknown_decl = find_startpos
345 def get_result(self):
346 if self.result is None:
348 if len(self.result) != 3:
350 lines = self.html.split('\n')
351 lines = lines[self.result[1][0]-1:self.result[2][0]]
352 lines[0] = lines[0][self.result[1][1]:]
354 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
355 lines[-1] = lines[-1][:self.result[2][1]]
356 return '\n'.join(lines).strip()
357 # Hack for https://github.com/rg3/youtube-dl/issues/662
358 if sys.version_info < (2, 7, 3):
359 AttrParser.parse_endtag = (lambda self, i:
360 i + len("</scr'+'ipt>")
361 if self.rawdata[i:].startswith("</scr'+'ipt>")
362 else compat_html_parser.HTMLParser.parse_endtag(self, i))
364 def get_element_by_id(id, html):
365 """Return the content of the tag with the specified ID in the passed HTML document"""
366 return get_element_by_attribute("id", id, html)
368 def get_element_by_attribute(attribute, value, html):
369 """Return the content of the tag with the specified attribute in the passed HTML document"""
370 parser = AttrParser(attribute, value)
373 except compat_html_parser.HTMLParseError:
375 return parser.get_result()
377 class MetaParser(BaseHTMLParser):
379 Modified HTMLParser that isolates a meta tag with the specified name
382 def __init__(self, name):
383 BaseHTMLParser.__init__(self)
388 def handle_starttag(self, tag, attrs):
392 if attrs.get('name') == self.name:
393 self.result = attrs.get('content')
395 def get_result(self):
398 def get_meta_content(name, html):
400 Return the content attribute from the meta tag with the given name attribute.
402 parser = MetaParser(name)
405 except compat_html_parser.HTMLParseError:
407 return parser.get_result()
410 def clean_html(html):
411 """Clean an HTML snippet into a readable string"""
413 html = html.replace('\n', ' ')
414 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
415 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
417 html = re.sub('<.*?>', '', html)
418 # Replace html entities
419 html = unescapeHTML(html)
423 def sanitize_open(filename, open_mode):
424 """Try to open the given filename, and slightly tweak it if this fails.
426 Attempts to open the given filename. If this fails, it tries to change
427 the filename slightly, step by step, until it's either able to open it
428 or it fails and raises a final exception, like the standard open()
431 It returns the tuple (stream, definitive_file_name).
435 if sys.platform == 'win32':
437 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
438 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
439 stream = open(encodeFilename(filename), open_mode)
440 return (stream, filename)
441 except (IOError, OSError) as err:
442 if err.errno in (errno.EACCES,):
445 # In case of error, try to remove win32 forbidden chars
446 alt_filename = os.path.join(
447 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
448 for path_part in os.path.split(filename)
450 if alt_filename == filename:
453 # An exception here should be caught in the caller
454 stream = open(encodeFilename(filename), open_mode)
455 return (stream, alt_filename)
458 def timeconvert(timestr):
459 """Convert RFC 2822 defined time string into system timestamp"""
461 timetuple = email.utils.parsedate_tz(timestr)
462 if timetuple is not None:
463 timestamp = email.utils.mktime_tz(timetuple)
466 def sanitize_filename(s, restricted=False, is_id=False):
467 """Sanitizes a string so it could be used as part of a filename.
468 If restricted is set, use a stricter subset of allowed characters.
469 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
471 def replace_insane(char):
472 if char == '?' or ord(char) < 32 or ord(char) == 127:
475 return '' if restricted else '\''
477 return '_-' if restricted else ' -'
478 elif char in '\\/|*<>':
480 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
482 if restricted and ord(char) > 127:
486 result = u''.join(map(replace_insane, s))
488 while '__' in result:
489 result = result.replace('__', '_')
490 result = result.strip('_')
491 # Common case of "Foreign band name - English song title"
492 if restricted and result.startswith('-_'):
498 def orderedSet(iterable):
499 """ Remove all duplicates from the input iterable """
510 assert type(s) == compat_str
512 result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
516 def encodeFilename(s, for_subprocess=False):
518 @param s The name of the file
521 assert type(s) == compat_str
523 # Python 3 has a Unicode API
524 if sys.version_info >= (3, 0):
527 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
528 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
529 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
530 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
531 if not for_subprocess:
534 # For subprocess calls, encode with locale encoding
535 # Refer to http://stackoverflow.com/a/9951851/35070
536 encoding = preferredencoding()
538 encoding = sys.getfilesystemencoding()
541 return s.encode(encoding, 'ignore')
543 def decodeOption(optval):
546 if isinstance(optval, bytes):
547 optval = optval.decode(preferredencoding())
549 assert isinstance(optval, compat_str)
552 def formatSeconds(secs):
554 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
556 return '%d:%02d' % (secs // 60, secs % 60)
561 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
562 if sys.version_info < (3, 2):
565 class HTTPSConnectionV3(httplib.HTTPSConnection):
566 def __init__(self, *args, **kwargs):
567 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
570 sock = socket.create_connection((self.host, self.port), self.timeout)
571 if getattr(self, '_tunnel_host', False):
575 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
577 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
579 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
580 def https_open(self, req):
581 return self.do_open(HTTPSConnectionV3, req)
582 return HTTPSHandlerV3(**kwargs)
584 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
585 context.verify_mode = (ssl.CERT_NONE
586 if opts_no_check_certificate
587 else ssl.CERT_REQUIRED)
588 context.set_default_verify_paths()
590 context.load_default_certs()
591 except AttributeError:
593 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
595 class ExtractorError(Exception):
596 """Error during info extraction."""
597 def __init__(self, msg, tb=None, expected=False, cause=None):
598 """ tb, if given, is the original traceback (so that it can be printed out).
599 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
602 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
605 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
606 super(ExtractorError, self).__init__(msg)
609 self.exc_info = sys.exc_info() # preserve original exception
612 def format_traceback(self):
613 if self.traceback is None:
615 return u''.join(traceback.format_tb(self.traceback))
618 class RegexNotFoundError(ExtractorError):
619 """Error when a regex didn't match"""
623 class DownloadError(Exception):
624 """Download Error exception.
626 This exception may be thrown by FileDownloader objects if they are not
627 configured to continue on errors. They will contain the appropriate
630 def __init__(self, msg, exc_info=None):
631 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
632 super(DownloadError, self).__init__(msg)
633 self.exc_info = exc_info
636 class SameFileError(Exception):
637 """Same File exception.
639 This exception will be thrown by FileDownloader objects if they detect
640 multiple files would have to be downloaded to the same file on disk.
645 class PostProcessingError(Exception):
646 """Post Processing exception.
648 This exception may be raised by PostProcessor's .run() method to
649 indicate an error in the postprocessing task.
651 def __init__(self, msg):
654 class MaxDownloadsReached(Exception):
655 """ --max-downloads limit has been reached. """
659 class UnavailableVideoError(Exception):
660 """Unavailable Format exception.
662 This exception will be thrown when a video is requested
663 in a format that is not available for that video.
668 class ContentTooShortError(Exception):
669 """Content Too Short exception.
671 This exception may be raised by FileDownloader objects when a file they
672 download is too small for what the server announced first, indicating
673 the connection was probably interrupted.
679 def __init__(self, downloaded, expected):
680 self.downloaded = downloaded
681 self.expected = expected
683 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
684 """Handler for HTTP requests and responses.
686 This class, when installed with an OpenerDirector, automatically adds
687 the standard headers to every HTTP request and handles gzipped and
688 deflated responses from web servers. If compression is to be avoided in
689 a particular request, the original request in the program code only has
690 to include the HTTP header "Youtubedl-No-Compression", which will be
691 removed before making the real request.
693 Part of this code was copied from:
695 http://techknack.net/python-urllib2-handlers/
697 Andrew Rowls, the author of that code, agreed to release it to the
704 return zlib.decompress(data, -zlib.MAX_WBITS)
706 return zlib.decompress(data)
709 def addinfourl_wrapper(stream, headers, url, code):
710 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
711 return compat_urllib_request.addinfourl(stream, headers, url, code)
712 ret = compat_urllib_request.addinfourl(stream, headers, url)
716 def http_request(self, req):
717 for h,v in std_headers.items():
721 if 'Youtubedl-no-compression' in req.headers:
722 if 'Accept-encoding' in req.headers:
723 del req.headers['Accept-encoding']
724 del req.headers['Youtubedl-no-compression']
725 if 'Youtubedl-user-agent' in req.headers:
726 if 'User-agent' in req.headers:
727 del req.headers['User-agent']
728 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
729 del req.headers['Youtubedl-user-agent']
732 def http_response(self, req, resp):
735 if resp.headers.get('Content-encoding', '') == 'gzip':
736 content = resp.read()
737 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
739 uncompressed = io.BytesIO(gz.read())
740 except IOError as original_ioerror:
741 # There may be junk add the end of the file
742 # See http://stackoverflow.com/q/4928560/35070 for details
743 for i in range(1, 1024):
745 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
746 uncompressed = io.BytesIO(gz.read())
751 raise original_ioerror
752 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
753 resp.msg = old_resp.msg
755 if resp.headers.get('Content-encoding', '') == 'deflate':
756 gz = io.BytesIO(self.deflate(resp.read()))
757 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
758 resp.msg = old_resp.msg
761 https_request = http_request
762 https_response = http_response
765 def parse_iso8601(date_str):
766 """ Return a UNIX timestamp from the given date """
772 r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
775 timezone = datetime.timedelta()
777 date_str = date_str[:-len(m.group(0))]
778 if not m.group('sign'):
779 timezone = datetime.timedelta()
781 sign = 1 if m.group('sign') == '+' else -1
782 timezone = datetime.timedelta(
783 hours=sign * int(m.group('hours')),
784 minutes=sign * int(m.group('minutes')))
786 dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
787 return calendar.timegm(dt.timetuple())
790 def unified_strdate(date_str):
791 """Return a string with the date in the format YYYYMMDD"""
798 date_str = date_str.replace(',', ' ')
799 # %z (UTC offset) is only supported in python>=3.2
800 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
801 format_expressions = [
813 '%Y-%m-%dT%H:%M:%SZ',
814 '%Y-%m-%dT%H:%M:%S.%fZ',
815 '%Y-%m-%dT%H:%M:%S.%f0Z',
817 '%Y-%m-%dT%H:%M:%S.%f',
820 for expression in format_expressions:
822 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
825 if upload_date is None:
826 timetuple = email.utils.parsedate_tz(date_str)
828 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
831 def determine_ext(url, default_ext=u'unknown_video'):
832 guess = url.partition(u'?')[0].rpartition(u'.')[2]
833 if re.match(r'^[A-Za-z0-9]+$', guess):
838 def subtitles_filename(filename, sub_lang, sub_format):
839 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
841 def date_from_str(date_str):
843 Return a datetime object from a string in the format YYYYMMDD or
844 (now|today)[+-][0-9](day|week|month|year)(s)?"""
845 today = datetime.date.today()
846 if date_str == 'now'or date_str == 'today':
848 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
849 if match is not None:
850 sign = match.group('sign')
851 time = int(match.group('time'))
854 unit = match.group('unit')
863 delta = datetime.timedelta(**{unit: time})
865 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
867 def hyphenate_date(date_str):
869 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
870 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
871 if match is not None:
872 return '-'.join(match.groups())
876 class DateRange(object):
877 """Represents a time interval between two dates"""
878 def __init__(self, start=None, end=None):
879 """start and end must be strings in the format accepted by date"""
880 if start is not None:
881 self.start = date_from_str(start)
883 self.start = datetime.datetime.min.date()
885 self.end = date_from_str(end)
887 self.end = datetime.datetime.max.date()
888 if self.start > self.end:
889 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
892 """Returns a range that only contains the given day"""
894 def __contains__(self, date):
895 """Check if the date is in the range"""
896 if not isinstance(date, datetime.date):
897 date = date_from_str(date)
898 return self.start <= date <= self.end
900 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
904 """ Returns the platform name as a compat_str """
905 res = platform.platform()
906 if isinstance(res, bytes):
907 res = res.decode(preferredencoding())
909 assert isinstance(res, compat_str)
913 def _windows_write_string(s, out):
914 """ Returns True if the string was written using special methods,
915 False if it has yet to be written out."""
916 # Adapted from http://stackoverflow.com/a/3259271/35070
919 import ctypes.wintypes
927 return sum((2 if ord(c) > 0xffff else 1) for c in s)
929 fileno = out.fileno()
930 if fileno not in WIN_OUTPUT_IDS:
933 GetStdHandle = ctypes.WINFUNCTYPE(
934 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
935 ("GetStdHandle", ctypes.windll.kernel32))
936 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
938 WriteConsoleW = ctypes.WINFUNCTYPE(
939 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
940 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
941 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
942 written = ctypes.wintypes.DWORD(0)
944 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
945 FILE_TYPE_CHAR = 0x0002
946 FILE_TYPE_REMOTE = 0x8000
947 GetConsoleMode = ctypes.WINFUNCTYPE(
948 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
949 ctypes.POINTER(ctypes.wintypes.DWORD))(
950 ("GetConsoleMode", ctypes.windll.kernel32))
951 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
953 def not_a_console(handle):
954 if handle == INVALID_HANDLE_VALUE or handle is None:
956 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
957 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
962 remaining = ucs2_len(s)
965 h, s, min(remaining, 1024), ctypes.byref(written), None)
967 raise OSError('Failed to write string')
968 remaining -= written.value
972 def write_string(s, out=None, encoding=None):
975 assert type(s) == compat_str
977 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
978 if _windows_write_string(s, out):
981 if ('b' in getattr(out, 'mode', '') or
982 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
983 byt = s.encode(encoding or preferredencoding(), 'ignore')
985 elif hasattr(out, 'buffer'):
986 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
987 byt = s.encode(enc, 'ignore')
988 out.buffer.write(byt)
994 def bytes_to_intlist(bs):
997 if isinstance(bs[0], int): # Python 3
1000 return [ord(c) for c in bs]
1003 def intlist_to_bytes(xs):
1006 if isinstance(chr(0), bytes): # Python 2
1007 return ''.join([chr(x) for x in xs])
1012 def get_cachedir(params={}):
1013 cache_root = os.environ.get('XDG_CACHE_HOME',
1014 os.path.expanduser('~/.cache'))
1015 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
1018 # Cross-platform file locking
1019 if sys.platform == 'win32':
1020 import ctypes.wintypes
1023 class OVERLAPPED(ctypes.Structure):
1025 ('Internal', ctypes.wintypes.LPVOID),
1026 ('InternalHigh', ctypes.wintypes.LPVOID),
1027 ('Offset', ctypes.wintypes.DWORD),
1028 ('OffsetHigh', ctypes.wintypes.DWORD),
1029 ('hEvent', ctypes.wintypes.HANDLE),
1032 kernel32 = ctypes.windll.kernel32
1033 LockFileEx = kernel32.LockFileEx
1034 LockFileEx.argtypes = [
1035 ctypes.wintypes.HANDLE, # hFile
1036 ctypes.wintypes.DWORD, # dwFlags
1037 ctypes.wintypes.DWORD, # dwReserved
1038 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1039 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1040 ctypes.POINTER(OVERLAPPED) # Overlapped
1042 LockFileEx.restype = ctypes.wintypes.BOOL
1043 UnlockFileEx = kernel32.UnlockFileEx
1044 UnlockFileEx.argtypes = [
1045 ctypes.wintypes.HANDLE, # hFile
1046 ctypes.wintypes.DWORD, # dwReserved
1047 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1048 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1049 ctypes.POINTER(OVERLAPPED) # Overlapped
1051 UnlockFileEx.restype = ctypes.wintypes.BOOL
1052 whole_low = 0xffffffff
1053 whole_high = 0x7fffffff
1055 def _lock_file(f, exclusive):
1056 overlapped = OVERLAPPED()
1057 overlapped.Offset = 0
1058 overlapped.OffsetHigh = 0
1059 overlapped.hEvent = 0
1060 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1061 handle = msvcrt.get_osfhandle(f.fileno())
1062 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1063 whole_low, whole_high, f._lock_file_overlapped_p):
1064 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1066 def _unlock_file(f):
1067 assert f._lock_file_overlapped_p
1068 handle = msvcrt.get_osfhandle(f.fileno())
1069 if not UnlockFileEx(handle, 0,
1070 whole_low, whole_high, f._lock_file_overlapped_p):
1071 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1076 def _lock_file(f, exclusive):
1077 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1079 def _unlock_file(f):
1080 fcntl.lockf(f, fcntl.LOCK_UN)
1083 class locked_file(object):
1084 def __init__(self, filename, mode, encoding=None):
1085 assert mode in ['r', 'a', 'w']
1086 self.f = io.open(filename, mode, encoding=encoding)
1089 def __enter__(self):
1090 exclusive = self.mode != 'r'
1092 _lock_file(self.f, exclusive)
1098 def __exit__(self, etype, value, traceback):
1100 _unlock_file(self.f)
1107 def write(self, *args):
1108 return self.f.write(*args)
1110 def read(self, *args):
1111 return self.f.read(*args)
1114 def shell_quote(args):
1116 encoding = sys.getfilesystemencoding()
1117 if encoding is None:
1120 if isinstance(a, bytes):
1121 # We may get a filename encoded with 'encodeFilename'
1122 a = a.decode(encoding)
1123 quoted_args.append(pipes.quote(a))
1124 return u' '.join(quoted_args)
1127 def takewhile_inclusive(pred, seq):
1128 """ Like itertools.takewhile, but include the latest evaluated element
1129 (the first element so that Not pred(e)) """
1136 def smuggle_url(url, data):
1137 """ Pass additional data in a URL for internal use. """
1139 sdata = compat_urllib_parse.urlencode(
1140 {u'__youtubedl_smuggle': json.dumps(data)})
1141 return url + u'#' + sdata
1144 def unsmuggle_url(smug_url, default=None):
1145 if not '#__youtubedl_smuggle' in smug_url:
1146 return smug_url, default
1147 url, _, sdata = smug_url.rpartition(u'#')
1148 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1149 data = json.loads(jsond)
1153 def format_bytes(bytes):
1156 if type(bytes) is str:
1157 bytes = float(bytes)
1161 exponent = int(math.log(bytes, 1024.0))
1162 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1163 converted = float(bytes) / float(1024 ** exponent)
1164 return u'%.2f%s' % (converted, suffix)
1167 def str_to_int(int_str):
1168 int_str = re.sub(r'[,\.]', u'', int_str)
1172 def get_term_width():
1173 columns = os.environ.get('COLUMNS', None)
1178 sp = subprocess.Popen(
1180 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1181 out, err = sp.communicate()
1182 return int(out.split()[1])
1188 def month_by_name(name):
1189 """ Return the number of a month by (locale-independently) English name """
1192 u'January', u'February', u'March', u'April', u'May', u'June',
1193 u'July', u'August', u'September', u'October', u'November', u'December']
1195 return ENGLISH_NAMES.index(name) + 1
1200 def fix_xml_ampersands(xml_str):
1201 """Replace all the '&' by '&' in XML"""
1203 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1208 def setproctitle(title):
1209 assert isinstance(title, compat_str)
1211 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1214 title_bytes = title.encode('utf-8')
1215 buf = ctypes.create_string_buffer(len(title_bytes))
1216 buf.value = title_bytes
1218 libc.prctl(15, buf, 0, 0, 0)
1219 except AttributeError:
1220 return # Strange libc, just skip this
1223 def remove_start(s, start):
1224 if s.startswith(start):
1225 return s[len(start):]
1229 def url_basename(url):
1230 path = compat_urlparse.urlparse(url).path
1231 return path.strip(u'/').split(u'/')[-1]
1234 class HEADRequest(compat_urllib_request.Request):
1235 def get_method(self):
1239 def int_or_none(v, scale=1, default=None):
1240 return default if v is None else (int(v) // scale)
1243 def float_or_none(v, scale=1, default=None):
1244 return default if v is None else (float(v) / scale)
1247 def parse_duration(s):
1252 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
1255 res = int(m.group('secs'))
1257 res += int(m.group('mins')) * 60
1258 if m.group('hours'):
1259 res += int(m.group('hours')) * 60 * 60
1263 def prepend_extension(filename, ext):
1264 name, real_ext = os.path.splitext(filename)
1265 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1268 def check_executable(exe, args=[]):
1269 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1270 args can be a list of arguments for a short output (like -version) """
1272 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1278 class PagedList(object):
1279 def __init__(self, pagefunc, pagesize):
1280 self._pagefunc = pagefunc
1281 self._pagesize = pagesize
1284 # This is only useful for tests
1285 return len(self.getslice())
1287 def getslice(self, start=0, end=None):
1289 for pagenum in itertools.count(start // self._pagesize):
1290 firstid = pagenum * self._pagesize
1291 nextfirstid = pagenum * self._pagesize + self._pagesize
1292 if start >= nextfirstid:
1295 page_results = list(self._pagefunc(pagenum))
1298 start % self._pagesize
1299 if firstid <= start < nextfirstid
1303 ((end - 1) % self._pagesize) + 1
1304 if (end is not None and firstid <= end <= nextfirstid)
1307 if startv != 0 or endv is not None:
1308 page_results = page_results[startv:endv]
1309 res.extend(page_results)
1311 # A little optimization - if current page is not "full", ie. does
1312 # not contain page_size videos then we can assume that this page
1313 # is the last one - there are no more ids on further pages -
1314 # i.e. no need to query again.
1315 if len(page_results) + startv < self._pagesize:
1318 # If we got the whole page, but the next page is not interesting,
1319 # break out early as well
1320 if end == nextfirstid:
1325 def uppercase_escape(s):
1326 unicode_escape = codecs.getdecoder('unicode_escape')
1328 r'\\U[0-9a-fA-F]{8}',
1329 lambda m: unicode_escape(m.group(0))[0],
1333 struct.pack(u'!I', 0)
1335 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1336 def struct_pack(spec, *args):
1337 if isinstance(spec, compat_str):
1338 spec = spec.encode('ascii')
1339 return struct.pack(spec, *args)
1341 def struct_unpack(spec, *args):
1342 if isinstance(spec, compat_str):
1343 spec = spec.encode('ascii')
1344 return struct.unpack(spec, *args)
1346 struct_pack = struct.pack
1347 struct_unpack = struct.unpack
1350 def read_batch_urls(batch_fd):
1352 if not isinstance(url, compat_str):
1353 url = url.decode('utf-8', 'replace')
1354 BOM_UTF8 = u'\xef\xbb\xbf'
1355 if url.startswith(BOM_UTF8):
1356 url = url[len(BOM_UTF8):]
1358 if url.startswith(('#', ';', ']')):
1362 with contextlib.closing(batch_fd) as fd:
1363 return [url for url in map(fixup, fd) if url]
1366 def urlencode_postdata(*args, **kargs):
1367 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1371 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1372 def doctype(self, name, pubid, system):
1373 pass # Ignore doctypes
1375 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1376 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1377 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1380 if sys.version_info < (3, 0) and sys.platform == 'win32':
1381 def compat_getpass(prompt, *args, **kwargs):
1382 if isinstance(prompt, compat_str):
1383 prompt = prompt.encode(preferredencoding())
1384 return getpass.getpass(prompt, *args, **kwargs)
1386 compat_getpass = getpass.getpass
1398 def strip_jsonp(code):
1399 return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)