2 # -*- coding: utf-8 -*-
26 import urllib.request as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2 as compat_urllib_request
31 import urllib.error as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2 as compat_urllib_error
36 import urllib.parse as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib as compat_urllib_parse
41 from urllib.parse import urlparse as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse import urlparse as compat_urllib_parse_urlparse
46 import urllib.parse as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse as compat_urlparse
51 import http.cookiejar as compat_cookiejar
52 except ImportError: # Python 2
53 import cookielib as compat_cookiejar
56 import html.entities as compat_html_entities
57 except ImportError: # Python 2
58 import htmlentitydefs as compat_html_entities
61 import html.parser as compat_html_parser
62 except ImportError: # Python 2
63 import HTMLParser as compat_html_parser
66 import http.client as compat_http_client
67 except ImportError: # Python 2
68 import httplib as compat_http_client
71 from urllib.error import HTTPError as compat_HTTPError
72 except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
76 from urllib.request import urlretrieve as compat_urlretrieve
77 except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
82 from subprocess import DEVNULL
83 compat_subprocess_get_DEVNULL = lambda: DEVNULL
85 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
88 from urllib.parse import parse_qs as compat_parse_qs
89 except ImportError: # Python 2
90 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
91 # Python 2's version is apparently totally broken
92 def _unquote(string, encoding='utf-8', errors='replace'):
95 res = string.split('%')
102 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
109 pct_sequence += item[:2].decode('hex')
112 # This segment was just a single percent-encoded character.
113 # May be part of a sequence of code units, so delay decoding.
114 # (Stored in pct_sequence).
118 # Encountered non-percent-encoded characters. Flush the current
120 string += pct_sequence.decode(encoding, errors) + rest
123 # Flush the final pct_sequence
124 string += pct_sequence.decode(encoding, errors)
127 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
128 encoding='utf-8', errors='replace'):
129 qs, _coerce_result = qs, unicode
130 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
132 for name_value in pairs:
133 if not name_value and not strict_parsing:
135 nv = name_value.split('=', 1)
138 raise ValueError("bad query field: %r" % (name_value,))
139 # Handle case of a control-name with no equal sign
140 if keep_blank_values:
144 if len(nv[1]) or keep_blank_values:
145 name = nv[0].replace('+', ' ')
146 name = _unquote(name, encoding=encoding, errors=errors)
147 name = _coerce_result(name)
148 value = nv[1].replace('+', ' ')
149 value = _unquote(value, encoding=encoding, errors=errors)
150 value = _coerce_result(value)
151 r.append((name, value))
154 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
155 encoding='utf-8', errors='replace'):
157 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
158 encoding=encoding, errors=errors)
159 for name, value in pairs:
160 if name in parsed_result:
161 parsed_result[name].append(value)
163 parsed_result[name] = [value]
167 compat_str = unicode # Python 2
172 compat_chr = unichr # Python 2
177 if type(c) is int: return c
180 # This is not clearly defined otherwise
181 compiled_regex_type = type(re.compile(''))
184 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
185 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
186 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
187 'Accept-Encoding': 'gzip, deflate',
188 'Accept-Language': 'en-us,en;q=0.5',
191 def preferredencoding():
192 """Get preferred encoding.
194 Returns the best encoding scheme for the system, based on
195 locale.getpreferredencoding() and some further tweaks.
198 pref = locale.getpreferredencoding()
205 if sys.version_info < (3,0):
207 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
210 assert type(s) == type(u'')
213 # In Python 2.x, json.dump expects a bytestream.
214 # In Python 3.x, it writes to a character stream
215 if sys.version_info < (3,0):
216 def write_json_file(obj, fn):
217 with open(fn, 'wb') as f:
220 def write_json_file(obj, fn):
221 with open(fn, 'w', encoding='utf-8') as f:
224 if sys.version_info >= (2,7):
225 def find_xpath_attr(node, xpath, key, val):
226 """ Find the xpath xpath[@key=val] """
227 assert re.match(r'^[a-zA-Z]+$', key)
228 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
229 expr = xpath + u"[@%s='%s']" % (key, val)
230 return node.find(expr)
232 def find_xpath_attr(node, xpath, key, val):
233 for f in node.findall(xpath):
234 if f.attrib.get(key) == val:
238 # On python2.6 the xml.etree.ElementTree.Element methods don't support
239 # the namespace parameter
240 def xpath_with_ns(path, ns_map):
241 components = [c.split(':') for c in path.split('/')]
245 replaced.append(c[0])
248 replaced.append('{%s}%s' % (ns_map[ns], tag))
249 return '/'.join(replaced)
251 def htmlentity_transform(matchobj):
252 """Transforms an HTML entity to a character.
254 This function receives a match object and is intended to be used with
255 the re.sub() function.
257 entity = matchobj.group(1)
259 # Known non-numeric HTML entity
260 if entity in compat_html_entities.name2codepoint:
261 return compat_chr(compat_html_entities.name2codepoint[entity])
263 mobj = re.match(u'(?u)#(x?\\d+)', entity)
265 numstr = mobj.group(1)
266 if numstr.startswith(u'x'):
268 numstr = u'0%s' % numstr
271 return compat_chr(int(numstr, base))
273 # Unknown entity in name, return its literal representation
274 return (u'&%s;' % entity)
276 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
277 class BaseHTMLParser(compat_html_parser.HTMLParser):
279 compat_html_parser.HTMLParser.__init__(self)
282 def loads(self, html):
287 class AttrParser(BaseHTMLParser):
288 """Modified HTMLParser that isolates a tag with the specified attribute"""
289 def __init__(self, attribute, value):
290 self.attribute = attribute
295 self.watch_startpos = False
297 BaseHTMLParser.__init__(self)
299 def error(self, message):
300 if self.error_count > 10 or self.started:
301 raise compat_html_parser.HTMLParseError(message, self.getpos())
302 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
303 self.error_count += 1
306 def handle_starttag(self, tag, attrs):
309 self.find_startpos(None)
310 if self.attribute in attrs and attrs[self.attribute] == self.value:
313 self.watch_startpos = True
315 if not tag in self.depth: self.depth[tag] = 0
318 def handle_endtag(self, tag):
320 if tag in self.depth: self.depth[tag] -= 1
321 if self.depth[self.result[0]] == 0:
323 self.result.append(self.getpos())
325 def find_startpos(self, x):
326 """Needed to put the start position of the result (self.result[1])
327 after the opening tag with the requested id"""
328 if self.watch_startpos:
329 self.watch_startpos = False
330 self.result.append(self.getpos())
331 handle_entityref = handle_charref = handle_data = handle_comment = \
332 handle_decl = handle_pi = unknown_decl = find_startpos
334 def get_result(self):
335 if self.result is None:
337 if len(self.result) != 3:
339 lines = self.html.split('\n')
340 lines = lines[self.result[1][0]-1:self.result[2][0]]
341 lines[0] = lines[0][self.result[1][1]:]
343 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
344 lines[-1] = lines[-1][:self.result[2][1]]
345 return '\n'.join(lines).strip()
346 # Hack for https://github.com/rg3/youtube-dl/issues/662
347 if sys.version_info < (2, 7, 3):
348 AttrParser.parse_endtag = (lambda self, i:
349 i + len("</scr'+'ipt>")
350 if self.rawdata[i:].startswith("</scr'+'ipt>")
351 else compat_html_parser.HTMLParser.parse_endtag(self, i))
353 def get_element_by_id(id, html):
354 """Return the content of the tag with the specified ID in the passed HTML document"""
355 return get_element_by_attribute("id", id, html)
357 def get_element_by_attribute(attribute, value, html):
358 """Return the content of the tag with the specified attribute in the passed HTML document"""
359 parser = AttrParser(attribute, value)
362 except compat_html_parser.HTMLParseError:
364 return parser.get_result()
366 class MetaParser(BaseHTMLParser):
368 Modified HTMLParser that isolates a meta tag with the specified name
371 def __init__(self, name):
372 BaseHTMLParser.__init__(self)
377 def handle_starttag(self, tag, attrs):
381 if attrs.get('name') == self.name:
382 self.result = attrs.get('content')
384 def get_result(self):
387 def get_meta_content(name, html):
389 Return the content attribute from the meta tag with the given name attribute.
391 parser = MetaParser(name)
394 except compat_html_parser.HTMLParseError:
396 return parser.get_result()
399 def clean_html(html):
400 """Clean an HTML snippet into a readable string"""
402 html = html.replace('\n', ' ')
403 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
404 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
406 html = re.sub('<.*?>', '', html)
407 # Replace html entities
408 html = unescapeHTML(html)
412 def sanitize_open(filename, open_mode):
413 """Try to open the given filename, and slightly tweak it if this fails.
415 Attempts to open the given filename. If this fails, it tries to change
416 the filename slightly, step by step, until it's either able to open it
417 or it fails and raises a final exception, like the standard open()
420 It returns the tuple (stream, definitive_file_name).
424 if sys.platform == 'win32':
426 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
427 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
428 stream = open(encodeFilename(filename), open_mode)
429 return (stream, filename)
430 except (IOError, OSError) as err:
431 if err.errno in (errno.EACCES,):
434 # In case of error, try to remove win32 forbidden chars
435 alt_filename = os.path.join(
436 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
437 for path_part in os.path.split(filename)
439 if alt_filename == filename:
442 # An exception here should be caught in the caller
443 stream = open(encodeFilename(filename), open_mode)
444 return (stream, alt_filename)
447 def timeconvert(timestr):
448 """Convert RFC 2822 defined time string into system timestamp"""
450 timetuple = email.utils.parsedate_tz(timestr)
451 if timetuple is not None:
452 timestamp = email.utils.mktime_tz(timetuple)
455 def sanitize_filename(s, restricted=False, is_id=False):
456 """Sanitizes a string so it could be used as part of a filename.
457 If restricted is set, use a stricter subset of allowed characters.
458 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
460 def replace_insane(char):
461 if char == '?' or ord(char) < 32 or ord(char) == 127:
464 return '' if restricted else '\''
466 return '_-' if restricted else ' -'
467 elif char in '\\/|*<>':
469 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
471 if restricted and ord(char) > 127:
475 result = u''.join(map(replace_insane, s))
477 while '__' in result:
478 result = result.replace('__', '_')
479 result = result.strip('_')
480 # Common case of "Foreign band name - English song title"
481 if restricted and result.startswith('-_'):
487 def orderedSet(iterable):
488 """ Remove all duplicates from the input iterable """
499 assert type(s) == type(u'')
501 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
505 def encodeFilename(s, for_subprocess=False):
507 @param s The name of the file
510 assert type(s) == compat_str
512 # Python 3 has a Unicode API
513 if sys.version_info >= (3, 0):
516 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
517 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
518 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
519 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
520 if not for_subprocess:
523 # For subprocess calls, encode with locale encoding
524 # Refer to http://stackoverflow.com/a/9951851/35070
525 encoding = preferredencoding()
527 encoding = sys.getfilesystemencoding()
530 return s.encode(encoding, 'ignore')
533 def decodeOption(optval):
536 if isinstance(optval, bytes):
537 optval = optval.decode(preferredencoding())
539 assert isinstance(optval, compat_str)
542 def formatSeconds(secs):
544 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
546 return '%d:%02d' % (secs // 60, secs % 60)
551 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
552 if sys.version_info < (3, 2):
555 class HTTPSConnectionV3(httplib.HTTPSConnection):
556 def __init__(self, *args, **kwargs):
557 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
560 sock = socket.create_connection((self.host, self.port), self.timeout)
561 if getattr(self, '_tunnel_host', False):
565 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
567 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
569 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
570 def https_open(self, req):
571 return self.do_open(HTTPSConnectionV3, req)
572 return HTTPSHandlerV3(**kwargs)
574 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
575 context.verify_mode = (ssl.CERT_NONE
576 if opts_no_check_certificate
577 else ssl.CERT_REQUIRED)
578 context.set_default_verify_paths()
580 context.load_default_certs()
581 except AttributeError:
583 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
585 class ExtractorError(Exception):
586 """Error during info extraction."""
587 def __init__(self, msg, tb=None, expected=False, cause=None):
588 """ tb, if given, is the original traceback (so that it can be printed out).
589 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
592 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
595 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
596 super(ExtractorError, self).__init__(msg)
599 self.exc_info = sys.exc_info() # preserve original exception
602 def format_traceback(self):
603 if self.traceback is None:
605 return u''.join(traceback.format_tb(self.traceback))
608 class RegexNotFoundError(ExtractorError):
609 """Error when a regex didn't match"""
613 class DownloadError(Exception):
614 """Download Error exception.
616 This exception may be thrown by FileDownloader objects if they are not
617 configured to continue on errors. They will contain the appropriate
620 def __init__(self, msg, exc_info=None):
621 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
622 super(DownloadError, self).__init__(msg)
623 self.exc_info = exc_info
626 class SameFileError(Exception):
627 """Same File exception.
629 This exception will be thrown by FileDownloader objects if they detect
630 multiple files would have to be downloaded to the same file on disk.
635 class PostProcessingError(Exception):
636 """Post Processing exception.
638 This exception may be raised by PostProcessor's .run() method to
639 indicate an error in the postprocessing task.
641 def __init__(self, msg):
644 class MaxDownloadsReached(Exception):
645 """ --max-downloads limit has been reached. """
649 class UnavailableVideoError(Exception):
650 """Unavailable Format exception.
652 This exception will be thrown when a video is requested
653 in a format that is not available for that video.
658 class ContentTooShortError(Exception):
659 """Content Too Short exception.
661 This exception may be raised by FileDownloader objects when a file they
662 download is too small for what the server announced first, indicating
663 the connection was probably interrupted.
669 def __init__(self, downloaded, expected):
670 self.downloaded = downloaded
671 self.expected = expected
673 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
674 """Handler for HTTP requests and responses.
676 This class, when installed with an OpenerDirector, automatically adds
677 the standard headers to every HTTP request and handles gzipped and
678 deflated responses from web servers. If compression is to be avoided in
679 a particular request, the original request in the program code only has
680 to include the HTTP header "Youtubedl-No-Compression", which will be
681 removed before making the real request.
683 Part of this code was copied from:
685 http://techknack.net/python-urllib2-handlers/
687 Andrew Rowls, the author of that code, agreed to release it to the
694 return zlib.decompress(data, -zlib.MAX_WBITS)
696 return zlib.decompress(data)
699 def addinfourl_wrapper(stream, headers, url, code):
700 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
701 return compat_urllib_request.addinfourl(stream, headers, url, code)
702 ret = compat_urllib_request.addinfourl(stream, headers, url)
706 def http_request(self, req):
707 for h,v in std_headers.items():
711 if 'Youtubedl-no-compression' in req.headers:
712 if 'Accept-encoding' in req.headers:
713 del req.headers['Accept-encoding']
714 del req.headers['Youtubedl-no-compression']
715 if 'Youtubedl-user-agent' in req.headers:
716 if 'User-agent' in req.headers:
717 del req.headers['User-agent']
718 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
719 del req.headers['Youtubedl-user-agent']
722 def http_response(self, req, resp):
725 if resp.headers.get('Content-encoding', '') == 'gzip':
726 content = resp.read()
727 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
729 uncompressed = io.BytesIO(gz.read())
730 except IOError as original_ioerror:
731 # There may be junk add the end of the file
732 # See http://stackoverflow.com/q/4928560/35070 for details
733 for i in range(1, 1024):
735 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
736 uncompressed = io.BytesIO(gz.read())
741 raise original_ioerror
742 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
743 resp.msg = old_resp.msg
745 if resp.headers.get('Content-encoding', '') == 'deflate':
746 gz = io.BytesIO(self.deflate(resp.read()))
747 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
748 resp.msg = old_resp.msg
751 https_request = http_request
752 https_response = http_response
755 def unified_strdate(date_str):
756 """Return a string with the date in the format YYYYMMDD"""
759 date_str = date_str.replace(',', ' ')
760 # %z (UTC offset) is only supported in python>=3.2
761 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
762 format_expressions = [
771 '%Y-%m-%dT%H:%M:%SZ',
772 '%Y-%m-%dT%H:%M:%S.%fZ',
773 '%Y-%m-%dT%H:%M:%S.%f0Z',
777 for expression in format_expressions:
779 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
782 if upload_date is None:
783 timetuple = email.utils.parsedate_tz(date_str)
785 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
788 def determine_ext(url, default_ext=u'unknown_video'):
789 guess = url.partition(u'?')[0].rpartition(u'.')[2]
790 if re.match(r'^[A-Za-z0-9]+$', guess):
795 def subtitles_filename(filename, sub_lang, sub_format):
796 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
798 def date_from_str(date_str):
800 Return a datetime object from a string in the format YYYYMMDD or
801 (now|today)[+-][0-9](day|week|month|year)(s)?"""
802 today = datetime.date.today()
803 if date_str == 'now'or date_str == 'today':
805 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
806 if match is not None:
807 sign = match.group('sign')
808 time = int(match.group('time'))
811 unit = match.group('unit')
820 delta = datetime.timedelta(**{unit: time})
822 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
824 def hyphenate_date(date_str):
826 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
827 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
828 if match is not None:
829 return '-'.join(match.groups())
833 class DateRange(object):
834 """Represents a time interval between two dates"""
835 def __init__(self, start=None, end=None):
836 """start and end must be strings in the format accepted by date"""
837 if start is not None:
838 self.start = date_from_str(start)
840 self.start = datetime.datetime.min.date()
842 self.end = date_from_str(end)
844 self.end = datetime.datetime.max.date()
845 if self.start > self.end:
846 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
849 """Returns a range that only contains the given day"""
851 def __contains__(self, date):
852 """Check if the date is in the range"""
853 if not isinstance(date, datetime.date):
854 date = date_from_str(date)
855 return self.start <= date <= self.end
857 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
861 """ Returns the platform name as a compat_str """
862 res = platform.platform()
863 if isinstance(res, bytes):
864 res = res.decode(preferredencoding())
866 assert isinstance(res, compat_str)
870 def write_string(s, out=None):
873 assert type(s) == compat_str
875 if ('b' in getattr(out, 'mode', '') or
876 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
877 s = s.encode(preferredencoding(), 'ignore')
880 except UnicodeEncodeError:
881 # In Windows shells, this can fail even when the codec is just charmap!?
882 # See https://wiki.python.org/moin/PrintFails#Issue
883 if sys.platform == 'win32' and hasattr(out, 'encoding'):
884 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
892 def bytes_to_intlist(bs):
895 if isinstance(bs[0], int): # Python 3
898 return [ord(c) for c in bs]
901 def intlist_to_bytes(xs):
904 if isinstance(chr(0), bytes): # Python 2
905 return ''.join([chr(x) for x in xs])
910 def get_cachedir(params={}):
911 cache_root = os.environ.get('XDG_CACHE_HOME',
912 os.path.expanduser('~/.cache'))
913 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
916 # Cross-platform file locking
917 if sys.platform == 'win32':
918 import ctypes.wintypes
921 class OVERLAPPED(ctypes.Structure):
923 ('Internal', ctypes.wintypes.LPVOID),
924 ('InternalHigh', ctypes.wintypes.LPVOID),
925 ('Offset', ctypes.wintypes.DWORD),
926 ('OffsetHigh', ctypes.wintypes.DWORD),
927 ('hEvent', ctypes.wintypes.HANDLE),
930 kernel32 = ctypes.windll.kernel32
931 LockFileEx = kernel32.LockFileEx
932 LockFileEx.argtypes = [
933 ctypes.wintypes.HANDLE, # hFile
934 ctypes.wintypes.DWORD, # dwFlags
935 ctypes.wintypes.DWORD, # dwReserved
936 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
937 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
938 ctypes.POINTER(OVERLAPPED) # Overlapped
940 LockFileEx.restype = ctypes.wintypes.BOOL
941 UnlockFileEx = kernel32.UnlockFileEx
942 UnlockFileEx.argtypes = [
943 ctypes.wintypes.HANDLE, # hFile
944 ctypes.wintypes.DWORD, # dwReserved
945 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
946 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
947 ctypes.POINTER(OVERLAPPED) # Overlapped
949 UnlockFileEx.restype = ctypes.wintypes.BOOL
950 whole_low = 0xffffffff
951 whole_high = 0x7fffffff
953 def _lock_file(f, exclusive):
954 overlapped = OVERLAPPED()
955 overlapped.Offset = 0
956 overlapped.OffsetHigh = 0
957 overlapped.hEvent = 0
958 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
959 handle = msvcrt.get_osfhandle(f.fileno())
960 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
961 whole_low, whole_high, f._lock_file_overlapped_p):
962 raise OSError('Locking file failed: %r' % ctypes.FormatError())
965 assert f._lock_file_overlapped_p
966 handle = msvcrt.get_osfhandle(f.fileno())
967 if not UnlockFileEx(handle, 0,
968 whole_low, whole_high, f._lock_file_overlapped_p):
969 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
974 def _lock_file(f, exclusive):
975 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
978 fcntl.lockf(f, fcntl.LOCK_UN)
981 class locked_file(object):
982 def __init__(self, filename, mode, encoding=None):
983 assert mode in ['r', 'a', 'w']
984 self.f = io.open(filename, mode, encoding=encoding)
988 exclusive = self.mode != 'r'
990 _lock_file(self.f, exclusive)
996 def __exit__(self, etype, value, traceback):
1005 def write(self, *args):
1006 return self.f.write(*args)
1008 def read(self, *args):
1009 return self.f.read(*args)
1012 def shell_quote(args):
1014 encoding = sys.getfilesystemencoding()
1015 if encoding is None:
1018 if isinstance(a, bytes):
1019 # We may get a filename encoded with 'encodeFilename'
1020 a = a.decode(encoding)
1021 quoted_args.append(pipes.quote(a))
1022 return u' '.join(quoted_args)
1025 def takewhile_inclusive(pred, seq):
1026 """ Like itertools.takewhile, but include the latest evaluated element
1027 (the first element so that Not pred(e)) """
1034 def smuggle_url(url, data):
1035 """ Pass additional data in a URL for internal use. """
1037 sdata = compat_urllib_parse.urlencode(
1038 {u'__youtubedl_smuggle': json.dumps(data)})
1039 return url + u'#' + sdata
1042 def unsmuggle_url(smug_url, default=None):
1043 if not '#__youtubedl_smuggle' in smug_url:
1044 return smug_url, default
1045 url, _, sdata = smug_url.rpartition(u'#')
1046 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1047 data = json.loads(jsond)
1051 def format_bytes(bytes):
1054 if type(bytes) is str:
1055 bytes = float(bytes)
1059 exponent = int(math.log(bytes, 1024.0))
1060 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1061 converted = float(bytes) / float(1024 ** exponent)
1062 return u'%.2f%s' % (converted, suffix)
1065 def str_to_int(int_str):
1066 int_str = re.sub(r'[,\.]', u'', int_str)
1070 def get_term_width():
1071 columns = os.environ.get('COLUMNS', None)
1076 sp = subprocess.Popen(
1078 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1079 out, err = sp.communicate()
1080 return int(out.split()[1])
1086 def month_by_name(name):
1087 """ Return the number of a month by (locale-independently) English name """
1090 u'January', u'February', u'March', u'April', u'May', u'June',
1091 u'July', u'August', u'September', u'October', u'November', u'December']
1093 return ENGLISH_NAMES.index(name) + 1
1098 def fix_xml_ampersands(xml_str):
1099 """Replace all the '&' by '&' in XML"""
1101 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1106 def setproctitle(title):
1107 assert isinstance(title, compat_str)
1109 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1113 buf = ctypes.create_string_buffer(len(title) + 1)
1114 buf.value = title.encode('utf-8')
1116 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1117 except AttributeError:
1118 return # Strange libc, just skip this
1121 def remove_start(s, start):
1122 if s.startswith(start):
1123 return s[len(start):]
1127 def url_basename(url):
1128 path = compat_urlparse.urlparse(url).path
1129 return path.strip(u'/').split(u'/')[-1]
1132 class HEADRequest(compat_urllib_request.Request):
1133 def get_method(self):
1137 def int_or_none(v, scale=1):
1138 return v if v is None else (int(v) // scale)
1141 def parse_duration(s):
1146 r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
1149 res = int(m.group('secs'))
1151 res += int(m.group('mins')) * 60
1152 if m.group('hours'):
1153 res += int(m.group('hours')) * 60 * 60
1157 def prepend_extension(filename, ext):
1158 name, real_ext = os.path.splitext(filename)
1159 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1162 def check_executable(exe, args=[]):
1163 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1164 args can be a list of arguments for a short output (like -version) """
1166 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1172 class PagedList(object):
1173 def __init__(self, pagefunc, pagesize):
1174 self._pagefunc = pagefunc
1175 self._pagesize = pagesize
1178 # This is only useful for tests
1179 return len(self.getslice())
1181 def getslice(self, start=0, end=None):
1183 for pagenum in itertools.count(start // self._pagesize):
1184 firstid = pagenum * self._pagesize
1185 nextfirstid = pagenum * self._pagesize + self._pagesize
1186 if start >= nextfirstid:
1189 page_results = list(self._pagefunc(pagenum))
1192 start % self._pagesize
1193 if firstid <= start < nextfirstid
1197 ((end - 1) % self._pagesize) + 1
1198 if (end is not None and firstid <= end <= nextfirstid)
1201 if startv != 0 or endv is not None:
1202 page_results = page_results[startv:endv]
1203 res.extend(page_results)
1205 # A little optimization - if current page is not "full", ie. does
1206 # not contain page_size videos then we can assume that this page
1207 # is the last one - there are no more ids on further pages -
1208 # i.e. no need to query again.
1209 if len(page_results) + startv < self._pagesize:
1212 # If we got the whole page, but the next page is not interesting,
1213 # break out early as well
1214 if end == nextfirstid:
1219 def uppercase_escape(s):
1221 r'\\U([0-9a-fA-F]{8})',
1222 lambda m: compat_chr(int(m.group(1), base=16)), s)