2 # -*- coding: utf-8 -*-
26 import urllib.request as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2 as compat_urllib_request
31 import urllib.error as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2 as compat_urllib_error
36 import urllib.parse as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib as compat_urllib_parse
41 from urllib.parse import urlparse as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse import urlparse as compat_urllib_parse_urlparse
46 import urllib.parse as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse as compat_urlparse
51 import http.cookiejar as compat_cookiejar
52 except ImportError: # Python 2
53 import cookielib as compat_cookiejar
56 import html.entities as compat_html_entities
57 except ImportError: # Python 2
58 import htmlentitydefs as compat_html_entities
61 import html.parser as compat_html_parser
62 except ImportError: # Python 2
63 import HTMLParser as compat_html_parser
66 import http.client as compat_http_client
67 except ImportError: # Python 2
68 import httplib as compat_http_client
71 from urllib.error import HTTPError as compat_HTTPError
72 except ImportError: # Python 2
73 from urllib2 import HTTPError as compat_HTTPError
76 from urllib.request import urlretrieve as compat_urlretrieve
77 except ImportError: # Python 2
78 from urllib import urlretrieve as compat_urlretrieve
82 from subprocess import DEVNULL
83 compat_subprocess_get_DEVNULL = lambda: DEVNULL
85 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
88 from urllib.parse import parse_qs as compat_parse_qs
89 except ImportError: # Python 2
90 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
91 # Python 2's version is apparently totally broken
92 def _unquote(string, encoding='utf-8', errors='replace'):
95 res = string.split('%')
102 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
109 pct_sequence += item[:2].decode('hex')
112 # This segment was just a single percent-encoded character.
113 # May be part of a sequence of code units, so delay decoding.
114 # (Stored in pct_sequence).
118 # Encountered non-percent-encoded characters. Flush the current
120 string += pct_sequence.decode(encoding, errors) + rest
123 # Flush the final pct_sequence
124 string += pct_sequence.decode(encoding, errors)
127 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
128 encoding='utf-8', errors='replace'):
129 qs, _coerce_result = qs, unicode
130 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
132 for name_value in pairs:
133 if not name_value and not strict_parsing:
135 nv = name_value.split('=', 1)
138 raise ValueError("bad query field: %r" % (name_value,))
139 # Handle case of a control-name with no equal sign
140 if keep_blank_values:
144 if len(nv[1]) or keep_blank_values:
145 name = nv[0].replace('+', ' ')
146 name = _unquote(name, encoding=encoding, errors=errors)
147 name = _coerce_result(name)
148 value = nv[1].replace('+', ' ')
149 value = _unquote(value, encoding=encoding, errors=errors)
150 value = _coerce_result(value)
151 r.append((name, value))
154 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
155 encoding='utf-8', errors='replace'):
157 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
158 encoding=encoding, errors=errors)
159 for name, value in pairs:
160 if name in parsed_result:
161 parsed_result[name].append(value)
163 parsed_result[name] = [value]
167 compat_str = unicode # Python 2
172 compat_chr = unichr # Python 2
177 if type(c) is int: return c
180 # This is not clearly defined otherwise
181 compiled_regex_type = type(re.compile(''))
184 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
185 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
186 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
187 'Accept-Encoding': 'gzip, deflate',
188 'Accept-Language': 'en-us,en;q=0.5',
191 def preferredencoding():
192 """Get preferred encoding.
194 Returns the best encoding scheme for the system, based on
195 locale.getpreferredencoding() and some further tweaks.
198 pref = locale.getpreferredencoding()
205 if sys.version_info < (3,0):
207 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
210 assert type(s) == type(u'')
213 # In Python 2.x, json.dump expects a bytestream.
214 # In Python 3.x, it writes to a character stream
215 if sys.version_info < (3,0):
216 def write_json_file(obj, fn):
217 with open(fn, 'wb') as f:
220 def write_json_file(obj, fn):
221 with open(fn, 'w', encoding='utf-8') as f:
224 if sys.version_info >= (2,7):
225 def find_xpath_attr(node, xpath, key, val):
226 """ Find the xpath xpath[@key=val] """
227 assert re.match(r'^[a-zA-Z]+$', key)
228 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
229 expr = xpath + u"[@%s='%s']" % (key, val)
230 return node.find(expr)
232 def find_xpath_attr(node, xpath, key, val):
233 for f in node.findall(xpath):
234 if f.attrib.get(key) == val:
238 # On python2.6 the xml.etree.ElementTree.Element methods don't support
239 # the namespace parameter
240 def xpath_with_ns(path, ns_map):
241 components = [c.split(':') for c in path.split('/')]
245 replaced.append(c[0])
248 replaced.append('{%s}%s' % (ns_map[ns], tag))
249 return '/'.join(replaced)
251 def htmlentity_transform(matchobj):
252 """Transforms an HTML entity to a character.
254 This function receives a match object and is intended to be used with
255 the re.sub() function.
257 entity = matchobj.group(1)
259 # Known non-numeric HTML entity
260 if entity in compat_html_entities.name2codepoint:
261 return compat_chr(compat_html_entities.name2codepoint[entity])
263 mobj = re.match(u'(?u)#(x?\\d+)', entity)
265 numstr = mobj.group(1)
266 if numstr.startswith(u'x'):
268 numstr = u'0%s' % numstr
271 return compat_chr(int(numstr, base))
273 # Unknown entity in name, return its literal representation
274 return (u'&%s;' % entity)
276 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
277 class BaseHTMLParser(compat_html_parser.HTMLParser):
279 compat_html_parser.HTMLParser.__init__(self)
282 def loads(self, html):
287 class AttrParser(BaseHTMLParser):
288 """Modified HTMLParser that isolates a tag with the specified attribute"""
289 def __init__(self, attribute, value):
290 self.attribute = attribute
295 self.watch_startpos = False
297 BaseHTMLParser.__init__(self)
299 def error(self, message):
300 if self.error_count > 10 or self.started:
301 raise compat_html_parser.HTMLParseError(message, self.getpos())
302 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
303 self.error_count += 1
306 def handle_starttag(self, tag, attrs):
309 self.find_startpos(None)
310 if self.attribute in attrs and attrs[self.attribute] == self.value:
313 self.watch_startpos = True
315 if not tag in self.depth: self.depth[tag] = 0
318 def handle_endtag(self, tag):
320 if tag in self.depth: self.depth[tag] -= 1
321 if self.depth[self.result[0]] == 0:
323 self.result.append(self.getpos())
325 def find_startpos(self, x):
326 """Needed to put the start position of the result (self.result[1])
327 after the opening tag with the requested id"""
328 if self.watch_startpos:
329 self.watch_startpos = False
330 self.result.append(self.getpos())
331 handle_entityref = handle_charref = handle_data = handle_comment = \
332 handle_decl = handle_pi = unknown_decl = find_startpos
334 def get_result(self):
335 if self.result is None:
337 if len(self.result) != 3:
339 lines = self.html.split('\n')
340 lines = lines[self.result[1][0]-1:self.result[2][0]]
341 lines[0] = lines[0][self.result[1][1]:]
343 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
344 lines[-1] = lines[-1][:self.result[2][1]]
345 return '\n'.join(lines).strip()
346 # Hack for https://github.com/rg3/youtube-dl/issues/662
347 if sys.version_info < (2, 7, 3):
348 AttrParser.parse_endtag = (lambda self, i:
349 i + len("</scr'+'ipt>")
350 if self.rawdata[i:].startswith("</scr'+'ipt>")
351 else compat_html_parser.HTMLParser.parse_endtag(self, i))
353 def get_element_by_id(id, html):
354 """Return the content of the tag with the specified ID in the passed HTML document"""
355 return get_element_by_attribute("id", id, html)
357 def get_element_by_attribute(attribute, value, html):
358 """Return the content of the tag with the specified attribute in the passed HTML document"""
359 parser = AttrParser(attribute, value)
362 except compat_html_parser.HTMLParseError:
364 return parser.get_result()
366 class MetaParser(BaseHTMLParser):
368 Modified HTMLParser that isolates a meta tag with the specified name
371 def __init__(self, name):
372 BaseHTMLParser.__init__(self)
377 def handle_starttag(self, tag, attrs):
381 if attrs.get('name') == self.name:
382 self.result = attrs.get('content')
384 def get_result(self):
387 def get_meta_content(name, html):
389 Return the content attribute from the meta tag with the given name attribute.
391 parser = MetaParser(name)
394 except compat_html_parser.HTMLParseError:
396 return parser.get_result()
399 def clean_html(html):
400 """Clean an HTML snippet into a readable string"""
402 html = html.replace('\n', ' ')
403 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
404 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
406 html = re.sub('<.*?>', '', html)
407 # Replace html entities
408 html = unescapeHTML(html)
412 def sanitize_open(filename, open_mode):
413 """Try to open the given filename, and slightly tweak it if this fails.
415 Attempts to open the given filename. If this fails, it tries to change
416 the filename slightly, step by step, until it's either able to open it
417 or it fails and raises a final exception, like the standard open()
420 It returns the tuple (stream, definitive_file_name).
424 if sys.platform == 'win32':
426 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
427 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
428 stream = open(encodeFilename(filename), open_mode)
429 return (stream, filename)
430 except (IOError, OSError) as err:
431 if err.errno in (errno.EACCES,):
434 # In case of error, try to remove win32 forbidden chars
435 alt_filename = os.path.join(
436 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
437 for path_part in os.path.split(filename)
439 if alt_filename == filename:
442 # An exception here should be caught in the caller
443 stream = open(encodeFilename(filename), open_mode)
444 return (stream, alt_filename)
447 def timeconvert(timestr):
448 """Convert RFC 2822 defined time string into system timestamp"""
450 timetuple = email.utils.parsedate_tz(timestr)
451 if timetuple is not None:
452 timestamp = email.utils.mktime_tz(timetuple)
455 def sanitize_filename(s, restricted=False, is_id=False):
456 """Sanitizes a string so it could be used as part of a filename.
457 If restricted is set, use a stricter subset of allowed characters.
458 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
460 def replace_insane(char):
461 if char == '?' or ord(char) < 32 or ord(char) == 127:
464 return '' if restricted else '\''
466 return '_-' if restricted else ' -'
467 elif char in '\\/|*<>':
469 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
471 if restricted and ord(char) > 127:
475 result = u''.join(map(replace_insane, s))
477 while '__' in result:
478 result = result.replace('__', '_')
479 result = result.strip('_')
480 # Common case of "Foreign band name - English song title"
481 if restricted and result.startswith('-_'):
487 def orderedSet(iterable):
488 """ Remove all duplicates from the input iterable """
499 assert type(s) == type(u'')
501 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
505 def encodeFilename(s, for_subprocess=False):
507 @param s The name of the file
510 assert type(s) == compat_str
512 # Python 3 has a Unicode API
513 if sys.version_info >= (3, 0):
516 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
517 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
518 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
519 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
520 if not for_subprocess:
523 # For subprocess calls, encode with locale encoding
524 # Refer to http://stackoverflow.com/a/9951851/35070
525 encoding = preferredencoding()
527 encoding = sys.getfilesystemencoding()
530 return s.encode(encoding, 'ignore')
533 def decodeOption(optval):
536 if isinstance(optval, bytes):
537 optval = optval.decode(preferredencoding())
539 assert isinstance(optval, compat_str)
542 def formatSeconds(secs):
544 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
546 return '%d:%02d' % (secs // 60, secs % 60)
551 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
552 if sys.version_info < (3, 2):
555 class HTTPSConnectionV3(httplib.HTTPSConnection):
556 def __init__(self, *args, **kwargs):
557 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
560 sock = socket.create_connection((self.host, self.port), self.timeout)
561 if getattr(self, '_tunnel_host', False):
565 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
567 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
569 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
570 def https_open(self, req):
571 return self.do_open(HTTPSConnectionV3, req)
572 return HTTPSHandlerV3(**kwargs)
574 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
575 context.verify_mode = (ssl.CERT_NONE
576 if opts_no_check_certificate
577 else ssl.CERT_REQUIRED)
578 context.set_default_verify_paths()
580 context.load_default_certs()
581 except AttributeError:
583 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
585 class ExtractorError(Exception):
586 """Error during info extraction."""
587 def __init__(self, msg, tb=None, expected=False, cause=None):
588 """ tb, if given, is the original traceback (so that it can be printed out).
589 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
592 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
595 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
596 super(ExtractorError, self).__init__(msg)
599 self.exc_info = sys.exc_info() # preserve original exception
602 def format_traceback(self):
603 if self.traceback is None:
605 return u''.join(traceback.format_tb(self.traceback))
608 class RegexNotFoundError(ExtractorError):
609 """Error when a regex didn't match"""
613 class DownloadError(Exception):
614 """Download Error exception.
616 This exception may be thrown by FileDownloader objects if they are not
617 configured to continue on errors. They will contain the appropriate
620 def __init__(self, msg, exc_info=None):
621 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
622 super(DownloadError, self).__init__(msg)
623 self.exc_info = exc_info
626 class SameFileError(Exception):
627 """Same File exception.
629 This exception will be thrown by FileDownloader objects if they detect
630 multiple files would have to be downloaded to the same file on disk.
635 class PostProcessingError(Exception):
636 """Post Processing exception.
638 This exception may be raised by PostProcessor's .run() method to
639 indicate an error in the postprocessing task.
641 def __init__(self, msg):
644 class MaxDownloadsReached(Exception):
645 """ --max-downloads limit has been reached. """
649 class UnavailableVideoError(Exception):
650 """Unavailable Format exception.
652 This exception will be thrown when a video is requested
653 in a format that is not available for that video.
658 class ContentTooShortError(Exception):
659 """Content Too Short exception.
661 This exception may be raised by FileDownloader objects when a file they
662 download is too small for what the server announced first, indicating
663 the connection was probably interrupted.
669 def __init__(self, downloaded, expected):
670 self.downloaded = downloaded
671 self.expected = expected
673 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
674 """Handler for HTTP requests and responses.
676 This class, when installed with an OpenerDirector, automatically adds
677 the standard headers to every HTTP request and handles gzipped and
678 deflated responses from web servers. If compression is to be avoided in
679 a particular request, the original request in the program code only has
680 to include the HTTP header "Youtubedl-No-Compression", which will be
681 removed before making the real request.
683 Part of this code was copied from:
685 http://techknack.net/python-urllib2-handlers/
687 Andrew Rowls, the author of that code, agreed to release it to the
694 return zlib.decompress(data, -zlib.MAX_WBITS)
696 return zlib.decompress(data)
699 def addinfourl_wrapper(stream, headers, url, code):
700 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
701 return compat_urllib_request.addinfourl(stream, headers, url, code)
702 ret = compat_urllib_request.addinfourl(stream, headers, url)
706 def http_request(self, req):
707 for h,v in std_headers.items():
711 if 'Youtubedl-no-compression' in req.headers:
712 if 'Accept-encoding' in req.headers:
713 del req.headers['Accept-encoding']
714 del req.headers['Youtubedl-no-compression']
715 if 'Youtubedl-user-agent' in req.headers:
716 if 'User-agent' in req.headers:
717 del req.headers['User-agent']
718 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
719 del req.headers['Youtubedl-user-agent']
722 def http_response(self, req, resp):
725 if resp.headers.get('Content-encoding', '') == 'gzip':
726 content = resp.read()
727 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
729 uncompressed = io.BytesIO(gz.read())
730 except IOError as original_ioerror:
731 # There may be junk add the end of the file
732 # See http://stackoverflow.com/q/4928560/35070 for details
733 for i in range(1, 1024):
735 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
736 uncompressed = io.BytesIO(gz.read())
741 raise original_ioerror
742 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
743 resp.msg = old_resp.msg
745 if resp.headers.get('Content-encoding', '') == 'deflate':
746 gz = io.BytesIO(self.deflate(resp.read()))
747 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
748 resp.msg = old_resp.msg
751 https_request = http_request
752 https_response = http_response
754 def unified_strdate(date_str):
755 """Return a string with the date in the format YYYYMMDD"""
758 date_str = date_str.replace(',',' ')
759 # %z (UTC offset) is only supported in python>=3.2
760 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
761 format_expressions = [
770 '%Y-%m-%dT%H:%M:%SZ',
771 '%Y-%m-%dT%H:%M:%S.%fZ',
772 '%Y-%m-%dT%H:%M:%S.%f0Z',
775 for expression in format_expressions:
777 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
780 if upload_date is None:
781 timetuple = email.utils.parsedate_tz(date_str)
783 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
786 def determine_ext(url, default_ext=u'unknown_video'):
787 guess = url.partition(u'?')[0].rpartition(u'.')[2]
788 if re.match(r'^[A-Za-z0-9]+$', guess):
793 def subtitles_filename(filename, sub_lang, sub_format):
794 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
796 def date_from_str(date_str):
798 Return a datetime object from a string in the format YYYYMMDD or
799 (now|today)[+-][0-9](day|week|month|year)(s)?"""
800 today = datetime.date.today()
801 if date_str == 'now'or date_str == 'today':
803 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
804 if match is not None:
805 sign = match.group('sign')
806 time = int(match.group('time'))
809 unit = match.group('unit')
818 delta = datetime.timedelta(**{unit: time})
820 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
822 def hyphenate_date(date_str):
824 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
825 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
826 if match is not None:
827 return '-'.join(match.groups())
831 class DateRange(object):
832 """Represents a time interval between two dates"""
833 def __init__(self, start=None, end=None):
834 """start and end must be strings in the format accepted by date"""
835 if start is not None:
836 self.start = date_from_str(start)
838 self.start = datetime.datetime.min.date()
840 self.end = date_from_str(end)
842 self.end = datetime.datetime.max.date()
843 if self.start > self.end:
844 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
847 """Returns a range that only contains the given day"""
849 def __contains__(self, date):
850 """Check if the date is in the range"""
851 if not isinstance(date, datetime.date):
852 date = date_from_str(date)
853 return self.start <= date <= self.end
855 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
859 """ Returns the platform name as a compat_str """
860 res = platform.platform()
861 if isinstance(res, bytes):
862 res = res.decode(preferredencoding())
864 assert isinstance(res, compat_str)
868 def write_string(s, out=None):
871 assert type(s) == compat_str
873 if ('b' in getattr(out, 'mode', '') or
874 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
875 s = s.encode(preferredencoding(), 'ignore')
878 except UnicodeEncodeError:
879 # In Windows shells, this can fail even when the codec is just charmap!?
880 # See https://wiki.python.org/moin/PrintFails#Issue
881 if sys.platform == 'win32' and hasattr(out, 'encoding'):
882 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
890 def bytes_to_intlist(bs):
893 if isinstance(bs[0], int): # Python 3
896 return [ord(c) for c in bs]
899 def intlist_to_bytes(xs):
902 if isinstance(chr(0), bytes): # Python 2
903 return ''.join([chr(x) for x in xs])
908 def get_cachedir(params={}):
909 cache_root = os.environ.get('XDG_CACHE_HOME',
910 os.path.expanduser('~/.cache'))
911 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
914 # Cross-platform file locking
915 if sys.platform == 'win32':
916 import ctypes.wintypes
919 class OVERLAPPED(ctypes.Structure):
921 ('Internal', ctypes.wintypes.LPVOID),
922 ('InternalHigh', ctypes.wintypes.LPVOID),
923 ('Offset', ctypes.wintypes.DWORD),
924 ('OffsetHigh', ctypes.wintypes.DWORD),
925 ('hEvent', ctypes.wintypes.HANDLE),
928 kernel32 = ctypes.windll.kernel32
929 LockFileEx = kernel32.LockFileEx
930 LockFileEx.argtypes = [
931 ctypes.wintypes.HANDLE, # hFile
932 ctypes.wintypes.DWORD, # dwFlags
933 ctypes.wintypes.DWORD, # dwReserved
934 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
935 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
936 ctypes.POINTER(OVERLAPPED) # Overlapped
938 LockFileEx.restype = ctypes.wintypes.BOOL
939 UnlockFileEx = kernel32.UnlockFileEx
940 UnlockFileEx.argtypes = [
941 ctypes.wintypes.HANDLE, # hFile
942 ctypes.wintypes.DWORD, # dwReserved
943 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
944 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
945 ctypes.POINTER(OVERLAPPED) # Overlapped
947 UnlockFileEx.restype = ctypes.wintypes.BOOL
948 whole_low = 0xffffffff
949 whole_high = 0x7fffffff
951 def _lock_file(f, exclusive):
952 overlapped = OVERLAPPED()
953 overlapped.Offset = 0
954 overlapped.OffsetHigh = 0
955 overlapped.hEvent = 0
956 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
957 handle = msvcrt.get_osfhandle(f.fileno())
958 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
959 whole_low, whole_high, f._lock_file_overlapped_p):
960 raise OSError('Locking file failed: %r' % ctypes.FormatError())
963 assert f._lock_file_overlapped_p
964 handle = msvcrt.get_osfhandle(f.fileno())
965 if not UnlockFileEx(handle, 0,
966 whole_low, whole_high, f._lock_file_overlapped_p):
967 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
972 def _lock_file(f, exclusive):
973 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
976 fcntl.lockf(f, fcntl.LOCK_UN)
979 class locked_file(object):
980 def __init__(self, filename, mode, encoding=None):
981 assert mode in ['r', 'a', 'w']
982 self.f = io.open(filename, mode, encoding=encoding)
986 exclusive = self.mode != 'r'
988 _lock_file(self.f, exclusive)
994 def __exit__(self, etype, value, traceback):
1003 def write(self, *args):
1004 return self.f.write(*args)
1006 def read(self, *args):
1007 return self.f.read(*args)
1010 def shell_quote(args):
1012 encoding = sys.getfilesystemencoding()
1013 if encoding is None:
1016 if isinstance(a, bytes):
1017 # We may get a filename encoded with 'encodeFilename'
1018 a = a.decode(encoding)
1019 quoted_args.append(pipes.quote(a))
1020 return u' '.join(quoted_args)
1023 def takewhile_inclusive(pred, seq):
1024 """ Like itertools.takewhile, but include the latest evaluated element
1025 (the first element so that Not pred(e)) """
1032 def smuggle_url(url, data):
1033 """ Pass additional data in a URL for internal use. """
1035 sdata = compat_urllib_parse.urlencode(
1036 {u'__youtubedl_smuggle': json.dumps(data)})
1037 return url + u'#' + sdata
1040 def unsmuggle_url(smug_url, default=None):
1041 if not '#__youtubedl_smuggle' in smug_url:
1042 return smug_url, default
1043 url, _, sdata = smug_url.rpartition(u'#')
1044 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1045 data = json.loads(jsond)
1049 def format_bytes(bytes):
1052 if type(bytes) is str:
1053 bytes = float(bytes)
1057 exponent = int(math.log(bytes, 1024.0))
1058 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1059 converted = float(bytes) / float(1024 ** exponent)
1060 return u'%.2f%s' % (converted, suffix)
1063 def str_to_int(int_str):
1064 int_str = re.sub(r'[,\.]', u'', int_str)
1068 def get_term_width():
1069 columns = os.environ.get('COLUMNS', None)
1074 sp = subprocess.Popen(
1076 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1077 out, err = sp.communicate()
1078 return int(out.split()[1])
1084 def month_by_name(name):
1085 """ Return the number of a month by (locale-independently) English name """
1088 u'January', u'February', u'March', u'April', u'May', u'June',
1089 u'July', u'August', u'September', u'October', u'November', u'December']
1091 return ENGLISH_NAMES.index(name) + 1
1096 def fix_xml_ampersands(xml_str):
1097 """Replace all the '&' by '&' in XML"""
1099 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1104 def setproctitle(title):
1105 assert isinstance(title, compat_str)
1107 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1111 buf = ctypes.create_string_buffer(len(title) + 1)
1112 buf.value = title.encode('utf-8')
1114 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1115 except AttributeError:
1116 return # Strange libc, just skip this
1119 def remove_start(s, start):
1120 if s.startswith(start):
1121 return s[len(start):]
1125 def url_basename(url):
1126 path = compat_urlparse.urlparse(url).path
1127 return path.strip(u'/').split(u'/')[-1]
1130 class HEADRequest(compat_urllib_request.Request):
1131 def get_method(self):
1135 def int_or_none(v, scale=1):
1136 return v if v is None else (int(v) // scale)
1139 def parse_duration(s):
1144 r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
1147 res = int(m.group('secs'))
1149 res += int(m.group('mins')) * 60
1150 if m.group('hours'):
1151 res += int(m.group('hours')) * 60 * 60
1155 def prepend_extension(filename, ext):
1156 name, real_ext = os.path.splitext(filename)
1157 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1160 def check_executable(exe, args=[]):
1161 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1162 args can be a list of arguments for a short output (like -version) """
1164 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1170 class PagedList(object):
1171 def __init__(self, pagefunc, pagesize):
1172 self._pagefunc = pagefunc
1173 self._pagesize = pagesize
1176 # This is only useful for tests
1177 return len(self.getslice())
1179 def getslice(self, start=0, end=None):
1181 for pagenum in itertools.count(start // self._pagesize):
1182 firstid = pagenum * self._pagesize
1183 nextfirstid = pagenum * self._pagesize + self._pagesize
1184 if start >= nextfirstid:
1187 page_results = list(self._pagefunc(pagenum))
1190 start % self._pagesize
1191 if firstid <= start < nextfirstid
1195 ((end - 1) % self._pagesize) + 1
1196 if (end is not None and firstid <= end <= nextfirstid)
1199 if startv != 0 or endv is not None:
1200 page_results = page_results[startv:endv]
1201 res.extend(page_results)
1203 # A little optimization - if current page is not "full", ie. does
1204 # not contain page_size videos then we can assume that this page
1205 # is the last one - there are no more ids on further pages -
1206 # i.e. no need to query again.
1207 if len(page_results) + startv < self._pagesize:
1210 # If we got the whole page, but the next page is not interesting,
1211 # break out early as well
1212 if end == nextfirstid: