2 # -*- coding: utf-8 -*-
26 import xml.etree.ElementTree
30 import urllib.request as compat_urllib_request
31 except ImportError: # Python 2
32 import urllib2 as compat_urllib_request
35 import urllib.error as compat_urllib_error
36 except ImportError: # Python 2
37 import urllib2 as compat_urllib_error
40 import urllib.parse as compat_urllib_parse
41 except ImportError: # Python 2
42 import urllib as compat_urllib_parse
45 from urllib.parse import urlparse as compat_urllib_parse_urlparse
46 except ImportError: # Python 2
47 from urlparse import urlparse as compat_urllib_parse_urlparse
50 import urllib.parse as compat_urlparse
51 except ImportError: # Python 2
52 import urlparse as compat_urlparse
55 import http.cookiejar as compat_cookiejar
56 except ImportError: # Python 2
57 import cookielib as compat_cookiejar
60 import html.entities as compat_html_entities
61 except ImportError: # Python 2
62 import htmlentitydefs as compat_html_entities
65 import html.parser as compat_html_parser
66 except ImportError: # Python 2
67 import HTMLParser as compat_html_parser
70 import http.client as compat_http_client
71 except ImportError: # Python 2
72 import httplib as compat_http_client
75 from urllib.error import HTTPError as compat_HTTPError
76 except ImportError: # Python 2
77 from urllib2 import HTTPError as compat_HTTPError
80 from urllib.request import urlretrieve as compat_urlretrieve
81 except ImportError: # Python 2
82 from urllib import urlretrieve as compat_urlretrieve
86 from subprocess import DEVNULL
87 compat_subprocess_get_DEVNULL = lambda: DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
92 from urllib.parse import parse_qs as compat_parse_qs
93 except ImportError: # Python 2
94 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
95 # Python 2's version is apparently totally broken
96 def _unquote(string, encoding='utf-8', errors='replace'):
99 res = string.split('%')
106 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
113 pct_sequence += item[:2].decode('hex')
116 # This segment was just a single percent-encoded character.
117 # May be part of a sequence of code units, so delay decoding.
118 # (Stored in pct_sequence).
122 # Encountered non-percent-encoded characters. Flush the current
124 string += pct_sequence.decode(encoding, errors) + rest
127 # Flush the final pct_sequence
128 string += pct_sequence.decode(encoding, errors)
131 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
132 encoding='utf-8', errors='replace'):
133 qs, _coerce_result = qs, unicode
134 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
136 for name_value in pairs:
137 if not name_value and not strict_parsing:
139 nv = name_value.split('=', 1)
142 raise ValueError("bad query field: %r" % (name_value,))
143 # Handle case of a control-name with no equal sign
144 if keep_blank_values:
148 if len(nv[1]) or keep_blank_values:
149 name = nv[0].replace('+', ' ')
150 name = _unquote(name, encoding=encoding, errors=errors)
151 name = _coerce_result(name)
152 value = nv[1].replace('+', ' ')
153 value = _unquote(value, encoding=encoding, errors=errors)
154 value = _coerce_result(value)
155 r.append((name, value))
158 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
159 encoding='utf-8', errors='replace'):
161 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
162 encoding=encoding, errors=errors)
163 for name, value in pairs:
164 if name in parsed_result:
165 parsed_result[name].append(value)
167 parsed_result[name] = [value]
171 compat_str = unicode # Python 2
176 compat_chr = unichr # Python 2
181 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
182 except ImportError: # Python 2.6
183 from xml.parsers.expat import ExpatError as compat_xml_parse_error
186 if type(c) is int: return c
189 # This is not clearly defined otherwise
190 compiled_regex_type = type(re.compile(''))
193 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
194 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
195 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
196 'Accept-Encoding': 'gzip, deflate',
197 'Accept-Language': 'en-us,en;q=0.5',
200 def preferredencoding():
201 """Get preferred encoding.
203 Returns the best encoding scheme for the system, based on
204 locale.getpreferredencoding() and some further tweaks.
207 pref = locale.getpreferredencoding()
214 if sys.version_info < (3,0):
216 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
219 assert type(s) == type(u'')
222 # In Python 2.x, json.dump expects a bytestream.
223 # In Python 3.x, it writes to a character stream
224 if sys.version_info < (3,0):
225 def write_json_file(obj, fn):
226 with open(fn, 'wb') as f:
229 def write_json_file(obj, fn):
230 with open(fn, 'w', encoding='utf-8') as f:
233 if sys.version_info >= (2,7):
234 def find_xpath_attr(node, xpath, key, val):
235 """ Find the xpath xpath[@key=val] """
236 assert re.match(r'^[a-zA-Z]+$', key)
237 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
238 expr = xpath + u"[@%s='%s']" % (key, val)
239 return node.find(expr)
241 def find_xpath_attr(node, xpath, key, val):
242 for f in node.findall(xpath):
243 if f.attrib.get(key) == val:
247 # On python2.6 the xml.etree.ElementTree.Element methods don't support
248 # the namespace parameter
249 def xpath_with_ns(path, ns_map):
250 components = [c.split(':') for c in path.split('/')]
254 replaced.append(c[0])
257 replaced.append('{%s}%s' % (ns_map[ns], tag))
258 return '/'.join(replaced)
260 def htmlentity_transform(matchobj):
261 """Transforms an HTML entity to a character.
263 This function receives a match object and is intended to be used with
264 the re.sub() function.
266 entity = matchobj.group(1)
268 # Known non-numeric HTML entity
269 if entity in compat_html_entities.name2codepoint:
270 return compat_chr(compat_html_entities.name2codepoint[entity])
272 mobj = re.match(u'(?u)#(x?\\d+)', entity)
274 numstr = mobj.group(1)
275 if numstr.startswith(u'x'):
277 numstr = u'0%s' % numstr
280 return compat_chr(int(numstr, base))
282 # Unknown entity in name, return its literal representation
283 return (u'&%s;' % entity)
285 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
286 class BaseHTMLParser(compat_html_parser.HTMLParser):
288 compat_html_parser.HTMLParser.__init__(self)
291 def loads(self, html):
296 class AttrParser(BaseHTMLParser):
297 """Modified HTMLParser that isolates a tag with the specified attribute"""
298 def __init__(self, attribute, value):
299 self.attribute = attribute
304 self.watch_startpos = False
306 BaseHTMLParser.__init__(self)
308 def error(self, message):
309 if self.error_count > 10 or self.started:
310 raise compat_html_parser.HTMLParseError(message, self.getpos())
311 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
312 self.error_count += 1
315 def handle_starttag(self, tag, attrs):
318 self.find_startpos(None)
319 if self.attribute in attrs and attrs[self.attribute] == self.value:
322 self.watch_startpos = True
324 if not tag in self.depth: self.depth[tag] = 0
327 def handle_endtag(self, tag):
329 if tag in self.depth: self.depth[tag] -= 1
330 if self.depth[self.result[0]] == 0:
332 self.result.append(self.getpos())
334 def find_startpos(self, x):
335 """Needed to put the start position of the result (self.result[1])
336 after the opening tag with the requested id"""
337 if self.watch_startpos:
338 self.watch_startpos = False
339 self.result.append(self.getpos())
340 handle_entityref = handle_charref = handle_data = handle_comment = \
341 handle_decl = handle_pi = unknown_decl = find_startpos
343 def get_result(self):
344 if self.result is None:
346 if len(self.result) != 3:
348 lines = self.html.split('\n')
349 lines = lines[self.result[1][0]-1:self.result[2][0]]
350 lines[0] = lines[0][self.result[1][1]:]
352 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
353 lines[-1] = lines[-1][:self.result[2][1]]
354 return '\n'.join(lines).strip()
355 # Hack for https://github.com/rg3/youtube-dl/issues/662
356 if sys.version_info < (2, 7, 3):
357 AttrParser.parse_endtag = (lambda self, i:
358 i + len("</scr'+'ipt>")
359 if self.rawdata[i:].startswith("</scr'+'ipt>")
360 else compat_html_parser.HTMLParser.parse_endtag(self, i))
362 def get_element_by_id(id, html):
363 """Return the content of the tag with the specified ID in the passed HTML document"""
364 return get_element_by_attribute("id", id, html)
366 def get_element_by_attribute(attribute, value, html):
367 """Return the content of the tag with the specified attribute in the passed HTML document"""
368 parser = AttrParser(attribute, value)
371 except compat_html_parser.HTMLParseError:
373 return parser.get_result()
375 class MetaParser(BaseHTMLParser):
377 Modified HTMLParser that isolates a meta tag with the specified name
380 def __init__(self, name):
381 BaseHTMLParser.__init__(self)
386 def handle_starttag(self, tag, attrs):
390 if attrs.get('name') == self.name:
391 self.result = attrs.get('content')
393 def get_result(self):
396 def get_meta_content(name, html):
398 Return the content attribute from the meta tag with the given name attribute.
400 parser = MetaParser(name)
403 except compat_html_parser.HTMLParseError:
405 return parser.get_result()
408 def clean_html(html):
409 """Clean an HTML snippet into a readable string"""
411 html = html.replace('\n', ' ')
412 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
413 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
415 html = re.sub('<.*?>', '', html)
416 # Replace html entities
417 html = unescapeHTML(html)
421 def sanitize_open(filename, open_mode):
422 """Try to open the given filename, and slightly tweak it if this fails.
424 Attempts to open the given filename. If this fails, it tries to change
425 the filename slightly, step by step, until it's either able to open it
426 or it fails and raises a final exception, like the standard open()
429 It returns the tuple (stream, definitive_file_name).
433 if sys.platform == 'win32':
435 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
436 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
437 stream = open(encodeFilename(filename), open_mode)
438 return (stream, filename)
439 except (IOError, OSError) as err:
440 if err.errno in (errno.EACCES,):
443 # In case of error, try to remove win32 forbidden chars
444 alt_filename = os.path.join(
445 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
446 for path_part in os.path.split(filename)
448 if alt_filename == filename:
451 # An exception here should be caught in the caller
452 stream = open(encodeFilename(filename), open_mode)
453 return (stream, alt_filename)
456 def timeconvert(timestr):
457 """Convert RFC 2822 defined time string into system timestamp"""
459 timetuple = email.utils.parsedate_tz(timestr)
460 if timetuple is not None:
461 timestamp = email.utils.mktime_tz(timetuple)
464 def sanitize_filename(s, restricted=False, is_id=False):
465 """Sanitizes a string so it could be used as part of a filename.
466 If restricted is set, use a stricter subset of allowed characters.
467 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
469 def replace_insane(char):
470 if char == '?' or ord(char) < 32 or ord(char) == 127:
473 return '' if restricted else '\''
475 return '_-' if restricted else ' -'
476 elif char in '\\/|*<>':
478 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
480 if restricted and ord(char) > 127:
484 result = u''.join(map(replace_insane, s))
486 while '__' in result:
487 result = result.replace('__', '_')
488 result = result.strip('_')
489 # Common case of "Foreign band name - English song title"
490 if restricted and result.startswith('-_'):
496 def orderedSet(iterable):
497 """ Remove all duplicates from the input iterable """
508 assert type(s) == type(u'')
510 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
514 def encodeFilename(s, for_subprocess=False):
516 @param s The name of the file
519 assert type(s) == compat_str
521 # Python 3 has a Unicode API
522 if sys.version_info >= (3, 0):
525 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
526 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
527 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
528 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
529 if not for_subprocess:
532 # For subprocess calls, encode with locale encoding
533 # Refer to http://stackoverflow.com/a/9951851/35070
534 encoding = preferredencoding()
536 encoding = sys.getfilesystemencoding()
539 return s.encode(encoding, 'ignore')
542 def decodeOption(optval):
545 if isinstance(optval, bytes):
546 optval = optval.decode(preferredencoding())
548 assert isinstance(optval, compat_str)
551 def formatSeconds(secs):
553 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
555 return '%d:%02d' % (secs // 60, secs % 60)
560 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
561 if sys.version_info < (3, 2):
564 class HTTPSConnectionV3(httplib.HTTPSConnection):
565 def __init__(self, *args, **kwargs):
566 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
569 sock = socket.create_connection((self.host, self.port), self.timeout)
570 if getattr(self, '_tunnel_host', False):
574 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
576 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
578 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
579 def https_open(self, req):
580 return self.do_open(HTTPSConnectionV3, req)
581 return HTTPSHandlerV3(**kwargs)
583 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
584 context.verify_mode = (ssl.CERT_NONE
585 if opts_no_check_certificate
586 else ssl.CERT_REQUIRED)
587 context.set_default_verify_paths()
589 context.load_default_certs()
590 except AttributeError:
592 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
594 class ExtractorError(Exception):
595 """Error during info extraction."""
596 def __init__(self, msg, tb=None, expected=False, cause=None):
597 """ tb, if given, is the original traceback (so that it can be printed out).
598 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
601 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
604 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
605 super(ExtractorError, self).__init__(msg)
608 self.exc_info = sys.exc_info() # preserve original exception
611 def format_traceback(self):
612 if self.traceback is None:
614 return u''.join(traceback.format_tb(self.traceback))
617 class RegexNotFoundError(ExtractorError):
618 """Error when a regex didn't match"""
622 class DownloadError(Exception):
623 """Download Error exception.
625 This exception may be thrown by FileDownloader objects if they are not
626 configured to continue on errors. They will contain the appropriate
629 def __init__(self, msg, exc_info=None):
630 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
631 super(DownloadError, self).__init__(msg)
632 self.exc_info = exc_info
635 class SameFileError(Exception):
636 """Same File exception.
638 This exception will be thrown by FileDownloader objects if they detect
639 multiple files would have to be downloaded to the same file on disk.
644 class PostProcessingError(Exception):
645 """Post Processing exception.
647 This exception may be raised by PostProcessor's .run() method to
648 indicate an error in the postprocessing task.
650 def __init__(self, msg):
653 class MaxDownloadsReached(Exception):
654 """ --max-downloads limit has been reached. """
658 class UnavailableVideoError(Exception):
659 """Unavailable Format exception.
661 This exception will be thrown when a video is requested
662 in a format that is not available for that video.
667 class ContentTooShortError(Exception):
668 """Content Too Short exception.
670 This exception may be raised by FileDownloader objects when a file they
671 download is too small for what the server announced first, indicating
672 the connection was probably interrupted.
678 def __init__(self, downloaded, expected):
679 self.downloaded = downloaded
680 self.expected = expected
682 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
683 """Handler for HTTP requests and responses.
685 This class, when installed with an OpenerDirector, automatically adds
686 the standard headers to every HTTP request and handles gzipped and
687 deflated responses from web servers. If compression is to be avoided in
688 a particular request, the original request in the program code only has
689 to include the HTTP header "Youtubedl-No-Compression", which will be
690 removed before making the real request.
692 Part of this code was copied from:
694 http://techknack.net/python-urllib2-handlers/
696 Andrew Rowls, the author of that code, agreed to release it to the
703 return zlib.decompress(data, -zlib.MAX_WBITS)
705 return zlib.decompress(data)
708 def addinfourl_wrapper(stream, headers, url, code):
709 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
710 return compat_urllib_request.addinfourl(stream, headers, url, code)
711 ret = compat_urllib_request.addinfourl(stream, headers, url)
715 def http_request(self, req):
716 for h,v in std_headers.items():
720 if 'Youtubedl-no-compression' in req.headers:
721 if 'Accept-encoding' in req.headers:
722 del req.headers['Accept-encoding']
723 del req.headers['Youtubedl-no-compression']
724 if 'Youtubedl-user-agent' in req.headers:
725 if 'User-agent' in req.headers:
726 del req.headers['User-agent']
727 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
728 del req.headers['Youtubedl-user-agent']
731 def http_response(self, req, resp):
734 if resp.headers.get('Content-encoding', '') == 'gzip':
735 content = resp.read()
736 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
738 uncompressed = io.BytesIO(gz.read())
739 except IOError as original_ioerror:
740 # There may be junk add the end of the file
741 # See http://stackoverflow.com/q/4928560/35070 for details
742 for i in range(1, 1024):
744 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
745 uncompressed = io.BytesIO(gz.read())
750 raise original_ioerror
751 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
752 resp.msg = old_resp.msg
754 if resp.headers.get('Content-encoding', '') == 'deflate':
755 gz = io.BytesIO(self.deflate(resp.read()))
756 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
757 resp.msg = old_resp.msg
760 https_request = http_request
761 https_response = http_response
764 def unified_strdate(date_str):
765 """Return a string with the date in the format YYYYMMDD"""
768 date_str = date_str.replace(',', ' ')
769 # %z (UTC offset) is only supported in python>=3.2
770 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
771 format_expressions = [
783 '%Y-%m-%dT%H:%M:%SZ',
784 '%Y-%m-%dT%H:%M:%S.%fZ',
785 '%Y-%m-%dT%H:%M:%S.%f0Z',
787 '%Y-%m-%dT%H:%M:%S.%f',
790 for expression in format_expressions:
792 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
795 if upload_date is None:
796 timetuple = email.utils.parsedate_tz(date_str)
798 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
801 def determine_ext(url, default_ext=u'unknown_video'):
802 guess = url.partition(u'?')[0].rpartition(u'.')[2]
803 if re.match(r'^[A-Za-z0-9]+$', guess):
808 def subtitles_filename(filename, sub_lang, sub_format):
809 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
811 def date_from_str(date_str):
813 Return a datetime object from a string in the format YYYYMMDD or
814 (now|today)[+-][0-9](day|week|month|year)(s)?"""
815 today = datetime.date.today()
816 if date_str == 'now'or date_str == 'today':
818 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
819 if match is not None:
820 sign = match.group('sign')
821 time = int(match.group('time'))
824 unit = match.group('unit')
833 delta = datetime.timedelta(**{unit: time})
835 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
837 def hyphenate_date(date_str):
839 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
840 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
841 if match is not None:
842 return '-'.join(match.groups())
846 class DateRange(object):
847 """Represents a time interval between two dates"""
848 def __init__(self, start=None, end=None):
849 """start and end must be strings in the format accepted by date"""
850 if start is not None:
851 self.start = date_from_str(start)
853 self.start = datetime.datetime.min.date()
855 self.end = date_from_str(end)
857 self.end = datetime.datetime.max.date()
858 if self.start > self.end:
859 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
862 """Returns a range that only contains the given day"""
864 def __contains__(self, date):
865 """Check if the date is in the range"""
866 if not isinstance(date, datetime.date):
867 date = date_from_str(date)
868 return self.start <= date <= self.end
870 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
874 """ Returns the platform name as a compat_str """
875 res = platform.platform()
876 if isinstance(res, bytes):
877 res = res.decode(preferredencoding())
879 assert isinstance(res, compat_str)
883 def write_string(s, out=None):
886 assert type(s) == compat_str
888 if ('b' in getattr(out, 'mode', '') or
889 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
890 s = s.encode(preferredencoding(), 'ignore')
893 except UnicodeEncodeError:
894 # In Windows shells, this can fail even when the codec is just charmap!?
895 # See https://wiki.python.org/moin/PrintFails#Issue
896 if sys.platform == 'win32' and hasattr(out, 'encoding'):
897 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
905 def bytes_to_intlist(bs):
908 if isinstance(bs[0], int): # Python 3
911 return [ord(c) for c in bs]
914 def intlist_to_bytes(xs):
917 if isinstance(chr(0), bytes): # Python 2
918 return ''.join([chr(x) for x in xs])
923 def get_cachedir(params={}):
924 cache_root = os.environ.get('XDG_CACHE_HOME',
925 os.path.expanduser('~/.cache'))
926 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
929 # Cross-platform file locking
930 if sys.platform == 'win32':
931 import ctypes.wintypes
934 class OVERLAPPED(ctypes.Structure):
936 ('Internal', ctypes.wintypes.LPVOID),
937 ('InternalHigh', ctypes.wintypes.LPVOID),
938 ('Offset', ctypes.wintypes.DWORD),
939 ('OffsetHigh', ctypes.wintypes.DWORD),
940 ('hEvent', ctypes.wintypes.HANDLE),
943 kernel32 = ctypes.windll.kernel32
944 LockFileEx = kernel32.LockFileEx
945 LockFileEx.argtypes = [
946 ctypes.wintypes.HANDLE, # hFile
947 ctypes.wintypes.DWORD, # dwFlags
948 ctypes.wintypes.DWORD, # dwReserved
949 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
950 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
951 ctypes.POINTER(OVERLAPPED) # Overlapped
953 LockFileEx.restype = ctypes.wintypes.BOOL
954 UnlockFileEx = kernel32.UnlockFileEx
955 UnlockFileEx.argtypes = [
956 ctypes.wintypes.HANDLE, # hFile
957 ctypes.wintypes.DWORD, # dwReserved
958 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
959 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
960 ctypes.POINTER(OVERLAPPED) # Overlapped
962 UnlockFileEx.restype = ctypes.wintypes.BOOL
963 whole_low = 0xffffffff
964 whole_high = 0x7fffffff
966 def _lock_file(f, exclusive):
967 overlapped = OVERLAPPED()
968 overlapped.Offset = 0
969 overlapped.OffsetHigh = 0
970 overlapped.hEvent = 0
971 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
972 handle = msvcrt.get_osfhandle(f.fileno())
973 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
974 whole_low, whole_high, f._lock_file_overlapped_p):
975 raise OSError('Locking file failed: %r' % ctypes.FormatError())
978 assert f._lock_file_overlapped_p
979 handle = msvcrt.get_osfhandle(f.fileno())
980 if not UnlockFileEx(handle, 0,
981 whole_low, whole_high, f._lock_file_overlapped_p):
982 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
987 def _lock_file(f, exclusive):
988 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
991 fcntl.lockf(f, fcntl.LOCK_UN)
994 class locked_file(object):
995 def __init__(self, filename, mode, encoding=None):
996 assert mode in ['r', 'a', 'w']
997 self.f = io.open(filename, mode, encoding=encoding)
1000 def __enter__(self):
1001 exclusive = self.mode != 'r'
1003 _lock_file(self.f, exclusive)
1009 def __exit__(self, etype, value, traceback):
1011 _unlock_file(self.f)
1018 def write(self, *args):
1019 return self.f.write(*args)
1021 def read(self, *args):
1022 return self.f.read(*args)
1025 def shell_quote(args):
1027 encoding = sys.getfilesystemencoding()
1028 if encoding is None:
1031 if isinstance(a, bytes):
1032 # We may get a filename encoded with 'encodeFilename'
1033 a = a.decode(encoding)
1034 quoted_args.append(pipes.quote(a))
1035 return u' '.join(quoted_args)
1038 def takewhile_inclusive(pred, seq):
1039 """ Like itertools.takewhile, but include the latest evaluated element
1040 (the first element so that Not pred(e)) """
1047 def smuggle_url(url, data):
1048 """ Pass additional data in a URL for internal use. """
1050 sdata = compat_urllib_parse.urlencode(
1051 {u'__youtubedl_smuggle': json.dumps(data)})
1052 return url + u'#' + sdata
1055 def unsmuggle_url(smug_url, default=None):
1056 if not '#__youtubedl_smuggle' in smug_url:
1057 return smug_url, default
1058 url, _, sdata = smug_url.rpartition(u'#')
1059 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1060 data = json.loads(jsond)
1064 def format_bytes(bytes):
1067 if type(bytes) is str:
1068 bytes = float(bytes)
1072 exponent = int(math.log(bytes, 1024.0))
1073 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1074 converted = float(bytes) / float(1024 ** exponent)
1075 return u'%.2f%s' % (converted, suffix)
1078 def str_to_int(int_str):
1079 int_str = re.sub(r'[,\.]', u'', int_str)
1083 def get_term_width():
1084 columns = os.environ.get('COLUMNS', None)
1089 sp = subprocess.Popen(
1091 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1092 out, err = sp.communicate()
1093 return int(out.split()[1])
1099 def month_by_name(name):
1100 """ Return the number of a month by (locale-independently) English name """
1103 u'January', u'February', u'March', u'April', u'May', u'June',
1104 u'July', u'August', u'September', u'October', u'November', u'December']
1106 return ENGLISH_NAMES.index(name) + 1
1111 def fix_xml_ampersands(xml_str):
1112 """Replace all the '&' by '&' in XML"""
1114 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1119 def setproctitle(title):
1120 assert isinstance(title, compat_str)
1122 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1126 buf = ctypes.create_string_buffer(len(title) + 1)
1127 buf.value = title.encode('utf-8')
1129 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1130 except AttributeError:
1131 return # Strange libc, just skip this
1134 def remove_start(s, start):
1135 if s.startswith(start):
1136 return s[len(start):]
1140 def url_basename(url):
1141 path = compat_urlparse.urlparse(url).path
1142 return path.strip(u'/').split(u'/')[-1]
1145 class HEADRequest(compat_urllib_request.Request):
1146 def get_method(self):
1150 def int_or_none(v, scale=1):
1151 return v if v is None else (int(v) // scale)
1154 def parse_duration(s):
1159 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1162 res = int(m.group('secs'))
1164 res += int(m.group('mins')) * 60
1165 if m.group('hours'):
1166 res += int(m.group('hours')) * 60 * 60
1170 def prepend_extension(filename, ext):
1171 name, real_ext = os.path.splitext(filename)
1172 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1175 def check_executable(exe, args=[]):
1176 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1177 args can be a list of arguments for a short output (like -version) """
1179 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1185 class PagedList(object):
1186 def __init__(self, pagefunc, pagesize):
1187 self._pagefunc = pagefunc
1188 self._pagesize = pagesize
1191 # This is only useful for tests
1192 return len(self.getslice())
1194 def getslice(self, start=0, end=None):
1196 for pagenum in itertools.count(start // self._pagesize):
1197 firstid = pagenum * self._pagesize
1198 nextfirstid = pagenum * self._pagesize + self._pagesize
1199 if start >= nextfirstid:
1202 page_results = list(self._pagefunc(pagenum))
1205 start % self._pagesize
1206 if firstid <= start < nextfirstid
1210 ((end - 1) % self._pagesize) + 1
1211 if (end is not None and firstid <= end <= nextfirstid)
1214 if startv != 0 or endv is not None:
1215 page_results = page_results[startv:endv]
1216 res.extend(page_results)
1218 # A little optimization - if current page is not "full", ie. does
1219 # not contain page_size videos then we can assume that this page
1220 # is the last one - there are no more ids on further pages -
1221 # i.e. no need to query again.
1222 if len(page_results) + startv < self._pagesize:
1225 # If we got the whole page, but the next page is not interesting,
1226 # break out early as well
1227 if end == nextfirstid:
1232 def uppercase_escape(s):
1234 r'\\U([0-9a-fA-F]{8})',
1235 lambda m: compat_chr(int(m.group(1), base=16)), s)
1238 struct.pack(u'!I', 0)
1240 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1241 def struct_pack(spec, *args):
1242 if isinstance(spec, compat_str):
1243 spec = spec.encode('ascii')
1244 return struct.pack(spec, *args)
1246 def struct_unpack(spec, *args):
1247 if isinstance(spec, compat_str):
1248 spec = spec.encode('ascii')
1249 return struct.unpack(spec, *args)
1251 struct_pack = struct.pack
1252 struct_unpack = struct.unpack
1255 def read_batch_urls(batch_fd):
1257 if not isinstance(url, compat_str):
1258 url = url.decode('utf-8', 'replace')
1259 BOM_UTF8 = u'\xef\xbb\xbf'
1260 if url.startswith(BOM_UTF8):
1261 url = url[len(BOM_UTF8):]
1263 if url.startswith(('#', ';', ']')):
1267 with contextlib.closing(batch_fd) as fd:
1268 return [url for url in map(fixup, fd) if url]
1271 def urlencode_postdata(*args, **kargs):
1272 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1276 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1277 def doctype(self, name, pubid, system):
1278 pass # Ignore doctypes
1280 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1281 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1282 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1285 if sys.version_info < (3, 0) and sys.platform == 'win32':
1286 def compat_getpass(prompt, *args, **kwargs):
1287 if isinstance(prompt, compat_str):
1288 prompt = prompt.encode(preferredencoding())
1289 return getpass.getpass(prompt, *args, **kwargs)
1291 compat_getpass = getpass.getpass