2 # -*- coding: utf-8 -*-
28 import xml.etree.ElementTree
32 import urllib.request as compat_urllib_request
33 except ImportError: # Python 2
34 import urllib2 as compat_urllib_request
37 import urllib.error as compat_urllib_error
38 except ImportError: # Python 2
39 import urllib2 as compat_urllib_error
42 import urllib.parse as compat_urllib_parse
43 except ImportError: # Python 2
44 import urllib as compat_urllib_parse
47 from urllib.parse import urlparse as compat_urllib_parse_urlparse
48 except ImportError: # Python 2
49 from urlparse import urlparse as compat_urllib_parse_urlparse
52 import urllib.parse as compat_urlparse
53 except ImportError: # Python 2
54 import urlparse as compat_urlparse
57 import http.cookiejar as compat_cookiejar
58 except ImportError: # Python 2
59 import cookielib as compat_cookiejar
62 import html.entities as compat_html_entities
63 except ImportError: # Python 2
64 import htmlentitydefs as compat_html_entities
67 import html.parser as compat_html_parser
68 except ImportError: # Python 2
69 import HTMLParser as compat_html_parser
72 import http.client as compat_http_client
73 except ImportError: # Python 2
74 import httplib as compat_http_client
77 from urllib.error import HTTPError as compat_HTTPError
78 except ImportError: # Python 2
79 from urllib2 import HTTPError as compat_HTTPError
82 from urllib.request import urlretrieve as compat_urlretrieve
83 except ImportError: # Python 2
84 from urllib import urlretrieve as compat_urlretrieve
88 from subprocess import DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: DEVNULL
91 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
94 from urllib.parse import parse_qs as compat_parse_qs
95 except ImportError: # Python 2
96 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
97 # Python 2's version is apparently totally broken
98 def _unquote(string, encoding='utf-8', errors='replace'):
101 res = string.split('%')
108 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
115 pct_sequence += item[:2].decode('hex')
118 # This segment was just a single percent-encoded character.
119 # May be part of a sequence of code units, so delay decoding.
120 # (Stored in pct_sequence).
124 # Encountered non-percent-encoded characters. Flush the current
126 string += pct_sequence.decode(encoding, errors) + rest
129 # Flush the final pct_sequence
130 string += pct_sequence.decode(encoding, errors)
133 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
134 encoding='utf-8', errors='replace'):
135 qs, _coerce_result = qs, unicode
136 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
138 for name_value in pairs:
139 if not name_value and not strict_parsing:
141 nv = name_value.split('=', 1)
144 raise ValueError("bad query field: %r" % (name_value,))
145 # Handle case of a control-name with no equal sign
146 if keep_blank_values:
150 if len(nv[1]) or keep_blank_values:
151 name = nv[0].replace('+', ' ')
152 name = _unquote(name, encoding=encoding, errors=errors)
153 name = _coerce_result(name)
154 value = nv[1].replace('+', ' ')
155 value = _unquote(value, encoding=encoding, errors=errors)
156 value = _coerce_result(value)
157 r.append((name, value))
160 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
161 encoding='utf-8', errors='replace'):
163 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
164 encoding=encoding, errors=errors)
165 for name, value in pairs:
166 if name in parsed_result:
167 parsed_result[name].append(value)
169 parsed_result[name] = [value]
173 compat_str = unicode # Python 2
178 compat_chr = unichr # Python 2
183 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
184 except ImportError: # Python 2.6
185 from xml.parsers.expat import ExpatError as compat_xml_parse_error
188 if type(c) is int: return c
191 # This is not clearly defined otherwise
192 compiled_regex_type = type(re.compile(''))
195 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
196 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
197 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
198 'Accept-Encoding': 'gzip, deflate',
199 'Accept-Language': 'en-us,en;q=0.5',
202 def preferredencoding():
203 """Get preferred encoding.
205 Returns the best encoding scheme for the system, based on
206 locale.getpreferredencoding() and some further tweaks.
209 pref = locale.getpreferredencoding()
216 if sys.version_info < (3,0):
218 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
221 assert type(s) == type(u'')
224 # In Python 2.x, json.dump expects a bytestream.
225 # In Python 3.x, it writes to a character stream
226 if sys.version_info < (3,0):
227 def write_json_file(obj, fn):
228 with open(fn, 'wb') as f:
231 def write_json_file(obj, fn):
232 with open(fn, 'w', encoding='utf-8') as f:
235 if sys.version_info >= (2,7):
236 def find_xpath_attr(node, xpath, key, val):
237 """ Find the xpath xpath[@key=val] """
238 assert re.match(r'^[a-zA-Z]+$', key)
239 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
240 expr = xpath + u"[@%s='%s']" % (key, val)
241 return node.find(expr)
243 def find_xpath_attr(node, xpath, key, val):
244 for f in node.findall(xpath):
245 if f.attrib.get(key) == val:
249 # On python2.6 the xml.etree.ElementTree.Element methods don't support
250 # the namespace parameter
251 def xpath_with_ns(path, ns_map):
252 components = [c.split(':') for c in path.split('/')]
256 replaced.append(c[0])
259 replaced.append('{%s}%s' % (ns_map[ns], tag))
260 return '/'.join(replaced)
262 def htmlentity_transform(matchobj):
263 """Transforms an HTML entity to a character.
265 This function receives a match object and is intended to be used with
266 the re.sub() function.
268 entity = matchobj.group(1)
270 # Known non-numeric HTML entity
271 if entity in compat_html_entities.name2codepoint:
272 return compat_chr(compat_html_entities.name2codepoint[entity])
274 mobj = re.match(u'(?u)#(x?\\d+)', entity)
276 numstr = mobj.group(1)
277 if numstr.startswith(u'x'):
279 numstr = u'0%s' % numstr
282 return compat_chr(int(numstr, base))
284 # Unknown entity in name, return its literal representation
285 return (u'&%s;' % entity)
287 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
288 class BaseHTMLParser(compat_html_parser.HTMLParser):
290 compat_html_parser.HTMLParser.__init__(self)
293 def loads(self, html):
298 class AttrParser(BaseHTMLParser):
299 """Modified HTMLParser that isolates a tag with the specified attribute"""
300 def __init__(self, attribute, value):
301 self.attribute = attribute
306 self.watch_startpos = False
308 BaseHTMLParser.__init__(self)
310 def error(self, message):
311 if self.error_count > 10 or self.started:
312 raise compat_html_parser.HTMLParseError(message, self.getpos())
313 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
314 self.error_count += 1
317 def handle_starttag(self, tag, attrs):
320 self.find_startpos(None)
321 if self.attribute in attrs and attrs[self.attribute] == self.value:
324 self.watch_startpos = True
326 if not tag in self.depth: self.depth[tag] = 0
329 def handle_endtag(self, tag):
331 if tag in self.depth: self.depth[tag] -= 1
332 if self.depth[self.result[0]] == 0:
334 self.result.append(self.getpos())
336 def find_startpos(self, x):
337 """Needed to put the start position of the result (self.result[1])
338 after the opening tag with the requested id"""
339 if self.watch_startpos:
340 self.watch_startpos = False
341 self.result.append(self.getpos())
342 handle_entityref = handle_charref = handle_data = handle_comment = \
343 handle_decl = handle_pi = unknown_decl = find_startpos
345 def get_result(self):
346 if self.result is None:
348 if len(self.result) != 3:
350 lines = self.html.split('\n')
351 lines = lines[self.result[1][0]-1:self.result[2][0]]
352 lines[0] = lines[0][self.result[1][1]:]
354 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
355 lines[-1] = lines[-1][:self.result[2][1]]
356 return '\n'.join(lines).strip()
357 # Hack for https://github.com/rg3/youtube-dl/issues/662
358 if sys.version_info < (2, 7, 3):
359 AttrParser.parse_endtag = (lambda self, i:
360 i + len("</scr'+'ipt>")
361 if self.rawdata[i:].startswith("</scr'+'ipt>")
362 else compat_html_parser.HTMLParser.parse_endtag(self, i))
364 def get_element_by_id(id, html):
365 """Return the content of the tag with the specified ID in the passed HTML document"""
366 return get_element_by_attribute("id", id, html)
368 def get_element_by_attribute(attribute, value, html):
369 """Return the content of the tag with the specified attribute in the passed HTML document"""
370 parser = AttrParser(attribute, value)
373 except compat_html_parser.HTMLParseError:
375 return parser.get_result()
377 class MetaParser(BaseHTMLParser):
379 Modified HTMLParser that isolates a meta tag with the specified name
382 def __init__(self, name):
383 BaseHTMLParser.__init__(self)
388 def handle_starttag(self, tag, attrs):
392 if attrs.get('name') == self.name:
393 self.result = attrs.get('content')
395 def get_result(self):
398 def get_meta_content(name, html):
400 Return the content attribute from the meta tag with the given name attribute.
402 parser = MetaParser(name)
405 except compat_html_parser.HTMLParseError:
407 return parser.get_result()
410 def clean_html(html):
411 """Clean an HTML snippet into a readable string"""
413 html = html.replace('\n', ' ')
414 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
415 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
417 html = re.sub('<.*?>', '', html)
418 # Replace html entities
419 html = unescapeHTML(html)
423 def sanitize_open(filename, open_mode):
424 """Try to open the given filename, and slightly tweak it if this fails.
426 Attempts to open the given filename. If this fails, it tries to change
427 the filename slightly, step by step, until it's either able to open it
428 or it fails and raises a final exception, like the standard open()
431 It returns the tuple (stream, definitive_file_name).
435 if sys.platform == 'win32':
437 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
438 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
439 stream = open(encodeFilename(filename), open_mode)
440 return (stream, filename)
441 except (IOError, OSError) as err:
442 if err.errno in (errno.EACCES,):
445 # In case of error, try to remove win32 forbidden chars
446 alt_filename = os.path.join(
447 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
448 for path_part in os.path.split(filename)
450 if alt_filename == filename:
453 # An exception here should be caught in the caller
454 stream = open(encodeFilename(filename), open_mode)
455 return (stream, alt_filename)
458 def timeconvert(timestr):
459 """Convert RFC 2822 defined time string into system timestamp"""
461 timetuple = email.utils.parsedate_tz(timestr)
462 if timetuple is not None:
463 timestamp = email.utils.mktime_tz(timetuple)
466 def sanitize_filename(s, restricted=False, is_id=False):
467 """Sanitizes a string so it could be used as part of a filename.
468 If restricted is set, use a stricter subset of allowed characters.
469 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
471 def replace_insane(char):
472 if char == '?' or ord(char) < 32 or ord(char) == 127:
475 return '' if restricted else '\''
477 return '_-' if restricted else ' -'
478 elif char in '\\/|*<>':
480 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
482 if restricted and ord(char) > 127:
486 result = u''.join(map(replace_insane, s))
488 while '__' in result:
489 result = result.replace('__', '_')
490 result = result.strip('_')
491 # Common case of "Foreign band name - English song title"
492 if restricted and result.startswith('-_'):
498 def orderedSet(iterable):
499 """ Remove all duplicates from the input iterable """
510 assert type(s) == compat_str
512 result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
516 def encodeFilename(s, for_subprocess=False):
518 @param s The name of the file
521 assert type(s) == compat_str
523 # Python 3 has a Unicode API
524 if sys.version_info >= (3, 0):
527 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
528 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
529 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
530 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
531 if not for_subprocess:
534 # For subprocess calls, encode with locale encoding
535 # Refer to http://stackoverflow.com/a/9951851/35070
536 encoding = preferredencoding()
538 encoding = sys.getfilesystemencoding()
541 return s.encode(encoding, 'ignore')
543 def decodeOption(optval):
546 if isinstance(optval, bytes):
547 optval = optval.decode(preferredencoding())
549 assert isinstance(optval, compat_str)
552 def formatSeconds(secs):
554 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
556 return '%d:%02d' % (secs // 60, secs % 60)
561 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
562 if sys.version_info < (3, 2):
565 class HTTPSConnectionV3(httplib.HTTPSConnection):
566 def __init__(self, *args, **kwargs):
567 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
570 sock = socket.create_connection((self.host, self.port), self.timeout)
571 if getattr(self, '_tunnel_host', False):
575 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
577 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
579 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
580 def https_open(self, req):
581 return self.do_open(HTTPSConnectionV3, req)
582 return HTTPSHandlerV3(**kwargs)
584 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
585 context.verify_mode = (ssl.CERT_NONE
586 if opts_no_check_certificate
587 else ssl.CERT_REQUIRED)
588 context.set_default_verify_paths()
590 context.load_default_certs()
591 except AttributeError:
593 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
595 class ExtractorError(Exception):
596 """Error during info extraction."""
597 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
598 """ tb, if given, is the original traceback (so that it can be printed out).
599 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
602 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
604 if video_id is not None:
605 msg = video_id + ': ' + msg
607 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
608 super(ExtractorError, self).__init__(msg)
611 self.exc_info = sys.exc_info() # preserve original exception
613 self.video_id = video_id
615 def format_traceback(self):
616 if self.traceback is None:
618 return u''.join(traceback.format_tb(self.traceback))
621 class RegexNotFoundError(ExtractorError):
622 """Error when a regex didn't match"""
626 class DownloadError(Exception):
627 """Download Error exception.
629 This exception may be thrown by FileDownloader objects if they are not
630 configured to continue on errors. They will contain the appropriate
633 def __init__(self, msg, exc_info=None):
634 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
635 super(DownloadError, self).__init__(msg)
636 self.exc_info = exc_info
639 class SameFileError(Exception):
640 """Same File exception.
642 This exception will be thrown by FileDownloader objects if they detect
643 multiple files would have to be downloaded to the same file on disk.
648 class PostProcessingError(Exception):
649 """Post Processing exception.
651 This exception may be raised by PostProcessor's .run() method to
652 indicate an error in the postprocessing task.
654 def __init__(self, msg):
657 class MaxDownloadsReached(Exception):
658 """ --max-downloads limit has been reached. """
662 class UnavailableVideoError(Exception):
663 """Unavailable Format exception.
665 This exception will be thrown when a video is requested
666 in a format that is not available for that video.
671 class ContentTooShortError(Exception):
672 """Content Too Short exception.
674 This exception may be raised by FileDownloader objects when a file they
675 download is too small for what the server announced first, indicating
676 the connection was probably interrupted.
682 def __init__(self, downloaded, expected):
683 self.downloaded = downloaded
684 self.expected = expected
686 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
687 """Handler for HTTP requests and responses.
689 This class, when installed with an OpenerDirector, automatically adds
690 the standard headers to every HTTP request and handles gzipped and
691 deflated responses from web servers. If compression is to be avoided in
692 a particular request, the original request in the program code only has
693 to include the HTTP header "Youtubedl-No-Compression", which will be
694 removed before making the real request.
696 Part of this code was copied from:
698 http://techknack.net/python-urllib2-handlers/
700 Andrew Rowls, the author of that code, agreed to release it to the
707 return zlib.decompress(data, -zlib.MAX_WBITS)
709 return zlib.decompress(data)
712 def addinfourl_wrapper(stream, headers, url, code):
713 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
714 return compat_urllib_request.addinfourl(stream, headers, url, code)
715 ret = compat_urllib_request.addinfourl(stream, headers, url)
719 def http_request(self, req):
720 for h,v in std_headers.items():
724 if 'Youtubedl-no-compression' in req.headers:
725 if 'Accept-encoding' in req.headers:
726 del req.headers['Accept-encoding']
727 del req.headers['Youtubedl-no-compression']
728 if 'Youtubedl-user-agent' in req.headers:
729 if 'User-agent' in req.headers:
730 del req.headers['User-agent']
731 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
732 del req.headers['Youtubedl-user-agent']
735 def http_response(self, req, resp):
738 if resp.headers.get('Content-encoding', '') == 'gzip':
739 content = resp.read()
740 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
742 uncompressed = io.BytesIO(gz.read())
743 except IOError as original_ioerror:
744 # There may be junk add the end of the file
745 # See http://stackoverflow.com/q/4928560/35070 for details
746 for i in range(1, 1024):
748 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
749 uncompressed = io.BytesIO(gz.read())
754 raise original_ioerror
755 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
756 resp.msg = old_resp.msg
758 if resp.headers.get('Content-encoding', '') == 'deflate':
759 gz = io.BytesIO(self.deflate(resp.read()))
760 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
761 resp.msg = old_resp.msg
764 https_request = http_request
765 https_response = http_response
768 def parse_iso8601(date_str):
769 """ Return a UNIX timestamp from the given date """
775 r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
778 timezone = datetime.timedelta()
780 date_str = date_str[:-len(m.group(0))]
781 if not m.group('sign'):
782 timezone = datetime.timedelta()
784 sign = 1 if m.group('sign') == '+' else -1
785 timezone = datetime.timedelta(
786 hours=sign * int(m.group('hours')),
787 minutes=sign * int(m.group('minutes')))
789 dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
790 return calendar.timegm(dt.timetuple())
793 def unified_strdate(date_str):
794 """Return a string with the date in the format YYYYMMDD"""
801 date_str = date_str.replace(',', ' ')
802 # %z (UTC offset) is only supported in python>=3.2
803 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
804 format_expressions = [
816 '%Y-%m-%dT%H:%M:%SZ',
817 '%Y-%m-%dT%H:%M:%S.%fZ',
818 '%Y-%m-%dT%H:%M:%S.%f0Z',
820 '%Y-%m-%dT%H:%M:%S.%f',
823 for expression in format_expressions:
825 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
828 if upload_date is None:
829 timetuple = email.utils.parsedate_tz(date_str)
831 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
834 def determine_ext(url, default_ext=u'unknown_video'):
835 guess = url.partition(u'?')[0].rpartition(u'.')[2]
836 if re.match(r'^[A-Za-z0-9]+$', guess):
841 def subtitles_filename(filename, sub_lang, sub_format):
842 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
844 def date_from_str(date_str):
846 Return a datetime object from a string in the format YYYYMMDD or
847 (now|today)[+-][0-9](day|week|month|year)(s)?"""
848 today = datetime.date.today()
849 if date_str == 'now'or date_str == 'today':
851 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
852 if match is not None:
853 sign = match.group('sign')
854 time = int(match.group('time'))
857 unit = match.group('unit')
866 delta = datetime.timedelta(**{unit: time})
868 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
870 def hyphenate_date(date_str):
872 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
873 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
874 if match is not None:
875 return '-'.join(match.groups())
879 class DateRange(object):
880 """Represents a time interval between two dates"""
881 def __init__(self, start=None, end=None):
882 """start and end must be strings in the format accepted by date"""
883 if start is not None:
884 self.start = date_from_str(start)
886 self.start = datetime.datetime.min.date()
888 self.end = date_from_str(end)
890 self.end = datetime.datetime.max.date()
891 if self.start > self.end:
892 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
895 """Returns a range that only contains the given day"""
897 def __contains__(self, date):
898 """Check if the date is in the range"""
899 if not isinstance(date, datetime.date):
900 date = date_from_str(date)
901 return self.start <= date <= self.end
903 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
907 """ Returns the platform name as a compat_str """
908 res = platform.platform()
909 if isinstance(res, bytes):
910 res = res.decode(preferredencoding())
912 assert isinstance(res, compat_str)
916 def _windows_write_string(s, out):
917 """ Returns True if the string was written using special methods,
918 False if it has yet to be written out."""
919 # Adapted from http://stackoverflow.com/a/3259271/35070
922 import ctypes.wintypes
929 fileno = out.fileno()
930 if fileno not in WIN_OUTPUT_IDS:
933 GetStdHandle = ctypes.WINFUNCTYPE(
934 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
935 ("GetStdHandle", ctypes.windll.kernel32))
936 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
938 WriteConsoleW = ctypes.WINFUNCTYPE(
939 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
940 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
941 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
942 written = ctypes.wintypes.DWORD(0)
944 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
945 FILE_TYPE_CHAR = 0x0002
946 FILE_TYPE_REMOTE = 0x8000
947 GetConsoleMode = ctypes.WINFUNCTYPE(
948 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
949 ctypes.POINTER(ctypes.wintypes.DWORD))(
950 ("GetConsoleMode", ctypes.windll.kernel32))
951 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
953 def not_a_console(handle):
954 if handle == INVALID_HANDLE_VALUE or handle is None:
956 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
957 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
962 def next_nonbmp_pos(s):
964 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
965 except StopIteration:
969 count = min(next_nonbmp_pos(s), 1024)
972 h, s, count if count else 2, ctypes.byref(written), None)
974 raise OSError('Failed to write string')
975 if not count: # We just wrote a non-BMP character
976 assert written.value == 2
979 assert written.value > 0
980 s = s[written.value:]
984 def write_string(s, out=None, encoding=None):
987 assert type(s) == compat_str
989 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
990 if _windows_write_string(s, out):
993 if ('b' in getattr(out, 'mode', '') or
994 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
995 byt = s.encode(encoding or preferredencoding(), 'ignore')
997 elif hasattr(out, 'buffer'):
998 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
999 byt = s.encode(enc, 'ignore')
1000 out.buffer.write(byt)
1006 def bytes_to_intlist(bs):
1009 if isinstance(bs[0], int): # Python 3
1012 return [ord(c) for c in bs]
1015 def intlist_to_bytes(xs):
1018 if isinstance(chr(0), bytes): # Python 2
1019 return ''.join([chr(x) for x in xs])
1024 def get_cachedir(params={}):
1025 cache_root = os.environ.get('XDG_CACHE_HOME',
1026 os.path.expanduser('~/.cache'))
1027 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
1030 # Cross-platform file locking
1031 if sys.platform == 'win32':
1032 import ctypes.wintypes
1035 class OVERLAPPED(ctypes.Structure):
1037 ('Internal', ctypes.wintypes.LPVOID),
1038 ('InternalHigh', ctypes.wintypes.LPVOID),
1039 ('Offset', ctypes.wintypes.DWORD),
1040 ('OffsetHigh', ctypes.wintypes.DWORD),
1041 ('hEvent', ctypes.wintypes.HANDLE),
1044 kernel32 = ctypes.windll.kernel32
1045 LockFileEx = kernel32.LockFileEx
1046 LockFileEx.argtypes = [
1047 ctypes.wintypes.HANDLE, # hFile
1048 ctypes.wintypes.DWORD, # dwFlags
1049 ctypes.wintypes.DWORD, # dwReserved
1050 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1051 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1052 ctypes.POINTER(OVERLAPPED) # Overlapped
1054 LockFileEx.restype = ctypes.wintypes.BOOL
1055 UnlockFileEx = kernel32.UnlockFileEx
1056 UnlockFileEx.argtypes = [
1057 ctypes.wintypes.HANDLE, # hFile
1058 ctypes.wintypes.DWORD, # dwReserved
1059 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1060 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1061 ctypes.POINTER(OVERLAPPED) # Overlapped
1063 UnlockFileEx.restype = ctypes.wintypes.BOOL
1064 whole_low = 0xffffffff
1065 whole_high = 0x7fffffff
1067 def _lock_file(f, exclusive):
1068 overlapped = OVERLAPPED()
1069 overlapped.Offset = 0
1070 overlapped.OffsetHigh = 0
1071 overlapped.hEvent = 0
1072 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1073 handle = msvcrt.get_osfhandle(f.fileno())
1074 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1075 whole_low, whole_high, f._lock_file_overlapped_p):
1076 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1078 def _unlock_file(f):
1079 assert f._lock_file_overlapped_p
1080 handle = msvcrt.get_osfhandle(f.fileno())
1081 if not UnlockFileEx(handle, 0,
1082 whole_low, whole_high, f._lock_file_overlapped_p):
1083 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1088 def _lock_file(f, exclusive):
1089 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1091 def _unlock_file(f):
1092 fcntl.lockf(f, fcntl.LOCK_UN)
1095 class locked_file(object):
1096 def __init__(self, filename, mode, encoding=None):
1097 assert mode in ['r', 'a', 'w']
1098 self.f = io.open(filename, mode, encoding=encoding)
1101 def __enter__(self):
1102 exclusive = self.mode != 'r'
1104 _lock_file(self.f, exclusive)
1110 def __exit__(self, etype, value, traceback):
1112 _unlock_file(self.f)
1119 def write(self, *args):
1120 return self.f.write(*args)
1122 def read(self, *args):
1123 return self.f.read(*args)
1126 def shell_quote(args):
1128 encoding = sys.getfilesystemencoding()
1129 if encoding is None:
1132 if isinstance(a, bytes):
1133 # We may get a filename encoded with 'encodeFilename'
1134 a = a.decode(encoding)
1135 quoted_args.append(pipes.quote(a))
1136 return u' '.join(quoted_args)
1139 def takewhile_inclusive(pred, seq):
1140 """ Like itertools.takewhile, but include the latest evaluated element
1141 (the first element so that Not pred(e)) """
1148 def smuggle_url(url, data):
1149 """ Pass additional data in a URL for internal use. """
1151 sdata = compat_urllib_parse.urlencode(
1152 {u'__youtubedl_smuggle': json.dumps(data)})
1153 return url + u'#' + sdata
1156 def unsmuggle_url(smug_url, default=None):
1157 if not '#__youtubedl_smuggle' in smug_url:
1158 return smug_url, default
1159 url, _, sdata = smug_url.rpartition(u'#')
1160 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1161 data = json.loads(jsond)
1165 def format_bytes(bytes):
1168 if type(bytes) is str:
1169 bytes = float(bytes)
1173 exponent = int(math.log(bytes, 1024.0))
1174 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1175 converted = float(bytes) / float(1024 ** exponent)
1176 return u'%.2f%s' % (converted, suffix)
1179 def str_to_int(int_str):
1180 int_str = re.sub(r'[,\.]', u'', int_str)
1184 def get_term_width():
1185 columns = os.environ.get('COLUMNS', None)
1190 sp = subprocess.Popen(
1192 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1193 out, err = sp.communicate()
1194 return int(out.split()[1])
1200 def month_by_name(name):
1201 """ Return the number of a month by (locale-independently) English name """
1204 u'January', u'February', u'March', u'April', u'May', u'June',
1205 u'July', u'August', u'September', u'October', u'November', u'December']
1207 return ENGLISH_NAMES.index(name) + 1
1212 def fix_xml_ampersands(xml_str):
1213 """Replace all the '&' by '&' in XML"""
1215 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1220 def setproctitle(title):
1221 assert isinstance(title, compat_str)
1223 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1226 title_bytes = title.encode('utf-8')
1227 buf = ctypes.create_string_buffer(len(title_bytes))
1228 buf.value = title_bytes
1230 libc.prctl(15, buf, 0, 0, 0)
1231 except AttributeError:
1232 return # Strange libc, just skip this
1235 def remove_start(s, start):
1236 if s.startswith(start):
1237 return s[len(start):]
1241 def url_basename(url):
1242 path = compat_urlparse.urlparse(url).path
1243 return path.strip(u'/').split(u'/')[-1]
1246 class HEADRequest(compat_urllib_request.Request):
1247 def get_method(self):
1251 def int_or_none(v, scale=1, default=None, get_attr=None):
1254 v = getattr(v, get_attr, None)
1255 return default if v is None else (int(v) // scale)
1258 def float_or_none(v, scale=1, default=None):
1259 return default if v is None else (float(v) / scale)
1262 def parse_duration(s):
1267 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
1270 res = int(m.group('secs'))
1272 res += int(m.group('mins')) * 60
1273 if m.group('hours'):
1274 res += int(m.group('hours')) * 60 * 60
1278 def prepend_extension(filename, ext):
1279 name, real_ext = os.path.splitext(filename)
1280 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1283 def check_executable(exe, args=[]):
1284 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1285 args can be a list of arguments for a short output (like -version) """
1287 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1293 class PagedList(object):
1294 def __init__(self, pagefunc, pagesize):
1295 self._pagefunc = pagefunc
1296 self._pagesize = pagesize
1299 # This is only useful for tests
1300 return len(self.getslice())
1302 def getslice(self, start=0, end=None):
1304 for pagenum in itertools.count(start // self._pagesize):
1305 firstid = pagenum * self._pagesize
1306 nextfirstid = pagenum * self._pagesize + self._pagesize
1307 if start >= nextfirstid:
1310 page_results = list(self._pagefunc(pagenum))
1313 start % self._pagesize
1314 if firstid <= start < nextfirstid
1318 ((end - 1) % self._pagesize) + 1
1319 if (end is not None and firstid <= end <= nextfirstid)
1322 if startv != 0 or endv is not None:
1323 page_results = page_results[startv:endv]
1324 res.extend(page_results)
1326 # A little optimization - if current page is not "full", ie. does
1327 # not contain page_size videos then we can assume that this page
1328 # is the last one - there are no more ids on further pages -
1329 # i.e. no need to query again.
1330 if len(page_results) + startv < self._pagesize:
1333 # If we got the whole page, but the next page is not interesting,
1334 # break out early as well
1335 if end == nextfirstid:
1340 def uppercase_escape(s):
1341 unicode_escape = codecs.getdecoder('unicode_escape')
1343 r'\\U[0-9a-fA-F]{8}',
1344 lambda m: unicode_escape(m.group(0))[0],
1348 struct.pack(u'!I', 0)
1350 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1351 def struct_pack(spec, *args):
1352 if isinstance(spec, compat_str):
1353 spec = spec.encode('ascii')
1354 return struct.pack(spec, *args)
1356 def struct_unpack(spec, *args):
1357 if isinstance(spec, compat_str):
1358 spec = spec.encode('ascii')
1359 return struct.unpack(spec, *args)
1361 struct_pack = struct.pack
1362 struct_unpack = struct.unpack
1365 def read_batch_urls(batch_fd):
1367 if not isinstance(url, compat_str):
1368 url = url.decode('utf-8', 'replace')
1369 BOM_UTF8 = u'\xef\xbb\xbf'
1370 if url.startswith(BOM_UTF8):
1371 url = url[len(BOM_UTF8):]
1373 if url.startswith(('#', ';', ']')):
1377 with contextlib.closing(batch_fd) as fd:
1378 return [url for url in map(fixup, fd) if url]
1381 def urlencode_postdata(*args, **kargs):
1382 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1386 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1387 def doctype(self, name, pubid, system):
1388 pass # Ignore doctypes
1390 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1391 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1392 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1395 if sys.version_info < (3, 0) and sys.platform == 'win32':
1396 def compat_getpass(prompt, *args, **kwargs):
1397 if isinstance(prompt, compat_str):
1398 prompt = prompt.encode(preferredencoding())
1399 return getpass.getpass(prompt, *args, **kwargs)
1401 compat_getpass = getpass.getpass
1413 def strip_jsonp(code):
1414 return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
1417 def qualities(quality_ids):
1418 """ Get a numeric quality value out of a list of possible values """
1421 return quality_ids.index(qid)