H. Frystyk Nielsen
+# Expires September 8, 1995 March 8, 1995
+#
+# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
+#
+# and
+#
+# Network Working Group R. Fielding
+# Request for Comments: 2616 et al
+# Obsoletes: 2068 June 1999
+# Category: Standards Track
+#
+# URL: http://www.faqs.org/rfcs/rfc2616.html
+
+# Log files
+# ---------
+#
+# Here's a quote from the NCSA httpd docs about log file format.
+#
+# | The logfile format is as follows. Each line consists of:
+# |
+# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
+# |
+# | host: Either the DNS name or the IP number of the remote client
+# | rfc931: Any information returned by identd for this person,
+# | - otherwise.
+# | authuser: If user sent a userid for authentication, the user name,
+# | - otherwise.
+# | DD: Day
+# | Mon: Month (calendar name)
+# | YYYY: Year
+# | hh: hour (24-hour format, the machine's timezone)
+# | mm: minutes
+# | ss: seconds
+# | request: The first line of the HTTP request as sent by the client.
+# | ddd: the status code returned by the server, - if not available.
+# | bbbb: the total number of bytes sent,
+# | *not including the HTTP/1.0 header*, - if not available
+# |
+# | You can determine the name of the file accessed through request.
+#
+# (Actually, the latter is only true if you know the server configuration
+# at the time the request was made!)
+
+__version__ = "0.3"
+
+__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
+
+import sys
+import time
+import socket # For gethostbyaddr()
+from warnings import filterwarnings, catch_warnings
+with catch_warnings():
+ if sys.py3kwarning:
+ filterwarnings("ignore", ".*mimetools has been removed",
+ DeprecationWarning)
+ import mimetools
+import SocketServer
+
+# Default error message template
+DEFAULT_ERROR_MESSAGE = """\
+
+Error response
+
+
+Error response
+Error code %(code)d.
+
Message: %(message)s.
+
Error code explanation: %(code)s = %(explain)s.
+
+"""
+
+DEFAULT_ERROR_CONTENT_TYPE = "text/html"
+
+def _quote_html(html):
+ return html.replace("&", "&").replace("<", "<").replace(">", ">")
+
+class HTTPServer(SocketServer.TCPServer):
+
+ allow_reuse_address = 1 # Seems to make sense in testing environment
+
+ def server_bind(self):
+ """Override server_bind to store the server name."""
+ SocketServer.TCPServer.server_bind(self)
+ host, port = self.socket.getsockname()[:2]
+ self.server_name = socket.getfqdn(host)
+ self.server_port = port
+
+
+class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
+
+ """HTTP request handler base class.
+
+ The following explanation of HTTP serves to guide you through the
+ code as well as to expose any misunderstandings I may have about
+ HTTP (so you don't need to read the code to figure out I'm wrong
+ :-).
+
+ HTTP (HyperText Transfer Protocol) is an extensible protocol on
+ top of a reliable stream transport (e.g. TCP/IP). The protocol
+ recognizes three parts to a request:
+
+ 1. One line identifying the request type and path
+ 2. An optional set of RFC-822-style headers
+ 3. An optional data part
+
+ The headers and data are separated by a blank line.
+
+ The first line of the request has the form
+
+
+
+ where is a (case-sensitive) keyword such as GET or POST,
+ is a string containing path information for the request,
+ and should be the string "HTTP/1.0" or "HTTP/1.1".
+ is encoded using the URL encoding scheme (using %xx to signify
+ the ASCII character with hex code xx).
+
+ The specification specifies that lines are separated by CRLF but
+ for compatibility with the widest range of clients recommends
+ servers also handle LF. Similarly, whitespace in the request line
+ is treated sensibly (allowing multiple spaces between components
+ and allowing trailing whitespace).
+
+ Similarly, for output, lines ought to be separated by CRLF pairs
+ but most clients grok LF characters just fine.
+
+ If the first line of the request has the form
+
+
+
+ (i.e. is left out) then this is assumed to be an HTTP
+ 0.9 request; this form has no optional headers and data part and
+ the reply consists of just the data.
+
+ The reply form of the HTTP 1.x protocol again has three parts:
+
+ 1. One line giving the response code
+ 2. An optional set of RFC-822-style headers
+ 3. The data
+
+ Again, the headers and data are separated by a blank line.
+
+ The response code line has the form
+
+
+
+ where is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
+ is a 3-digit response code indicating success or
+ failure of the request, and is an optional
+ human-readable string explaining what the response code means.
+
+ This server parses the request and the headers, and then calls a
+ function specific to the request type (). Specifically,
+ a request SPAM will be handled by a method do_SPAM(). If no
+ such method exists the server sends an error response to the
+ client. If it exists, it is called with no arguments:
+
+ do_SPAM()
+
+ Note that the request name is case sensitive (i.e. SPAM and spam
+ are different requests).
+
+ The various request details are stored in instance variables:
+
+ - client_address is the client IP address in the form (host,
+ port);
+
+ - command, path and version are the broken-down request line;
+
+ - headers is an instance of mimetools.Message (or a derived
+ class) containing the header information;
+
+ - rfile is a file object open for reading positioned at the
+ start of the optional input data part;
+
+ - wfile is a file object open for writing.
+
+ IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
+
+ The first thing to be written must be the response line. Then
+ follow 0 or more header lines, then a blank line, and then the
+ actual data (if any). The meaning of the header lines depends on
+ the command executed by the server; in most cases, when data is
+ returned, there should be at least one header line of the form
+
+ Content-type: /
+
+ where and should be registered MIME types,
+ e.g. "text/html" or "text/plain".
+
+ """
+
+ # The Python system version, truncated to its first component.
+ sys_version = "Python/" + sys.version.split()[0]
+
+ # The server software version. You may want to override this.
+ # The format is multiple whitespace-separated strings,
+ # where each string is of the form name[/version].
+ server_version = "BaseHTTP/" + __version__
+
+ # The default request version. This only affects responses up until
+ # the point where the request line is parsed, so it mainly decides what
+ # the client gets back when sending a malformed request line.
+ # Most web servers default to HTTP 0.9, i.e. don't send a status line.
+ default_request_version = "HTTP/0.9"
+
+ def parse_request(self):
+ """Parse a request (internal).
+
+ The request should be stored in self.raw_requestline; the results
+ are in self.command, self.path, self.request_version and
+ self.headers.
+
+ Return True for success, False for failure; on failure, an
+ error is sent back.
+
+ """
+ self.command = None # set in case of error on the first line
+ self.request_version = version = self.default_request_version
+ self.close_connection = 1
+ requestline = self.raw_requestline
+ requestline = requestline.rstrip('\r\n')
+ self.requestline = requestline
+ words = requestline.split()
+ if len(words) == 3:
+ command, path, version = words
+ if version[:5] != 'HTTP/':
+ self.send_error(400, "Bad request version (%r)" % version)
+ return False
+ try:
+ base_version_number = version.split('/', 1)[1]
+ version_number = base_version_number.split(".")
+ # RFC 2145 section 3.1 says there can be only one "." and
+ # - major and minor numbers MUST be treated as
+ # separate integers;
+ # - HTTP/2.4 is a lower version than HTTP/2.13, which in
+ # turn is lower than HTTP/12.3;
+ # - Leading zeros MUST be ignored by recipients.
+ if len(version_number) != 2:
+ raise ValueError
+ version_number = int(version_number[0]), int(version_number[1])
+ except (ValueError, IndexError):
+ self.send_error(400, "Bad request version (%r)" % version)
+ return False
+ if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
+ self.close_connection = 0
+ if version_number >= (2, 0):
+ self.send_error(505,
+ "Invalid HTTP Version (%s)" % base_version_number)
+ return False
+ elif len(words) == 2:
+ command, path = words
+ self.close_connection = 1
+ if command != 'GET':
+ self.send_error(400,
+ "Bad HTTP/0.9 request type (%r)" % command)
+ return False
+ elif not words:
+ return False
+ else:
+ self.send_error(400, "Bad request syntax (%r)" % requestline)
+ return False
+ self.command, self.path, self.request_version = command, path, version
+
+ # Examine the headers and look for a Connection directive
+ self.headers = self.MessageClass(self.rfile, 0)
+
+ conntype = self.headers.get('Connection', "")
+ if conntype.lower() == 'close':
+ self.close_connection = 1
+ elif (conntype.lower() == 'keep-alive' and
+ self.protocol_version >= "HTTP/1.1"):
+ self.close_connection = 0
+ return True
+
+ def handle_one_request(self):
+ """Handle a single HTTP request.
+
+ You normally don't need to override this method; see the class
+ __doc__ string for information on how to handle specific HTTP
+ commands such as GET and POST.
+
+ """
+ try:
+ self.raw_requestline = self.rfile.readline(65537)
+ if len(self.raw_requestline) > 65536:
+ self.requestline = ''
+ self.request_version = ''
+ self.command = ''
+ self.send_error(414)
+ return
+ if not self.raw_requestline:
+ self.close_connection = 1
+ return
+ if not self.parse_request():
+ # An error code has been sent, just exit
+ return
+ mname = 'do_' + self.command
+ if not hasattr(self, mname):
+ self.send_error(501, "Unsupported method (%r)" % self.command)
+ return
+ method = getattr(self, mname)
+ method()
+ self.wfile.flush() #actually send the response if not already done.
+ except socket.timeout, e:
+ #a read or a write timed out. Discard this connection
+ self.log_error("Request timed out: %r", e)
+ self.close_connection = 1
+ return
+
+ def handle(self):
+ """Handle multiple requests if necessary."""
+ self.close_connection = 1
+
+ self.handle_one_request()
+ while not self.close_connection:
+ self.handle_one_request()
+
+ def send_error(self, code, message=None):
+ """Send and log an error reply.
+
+ Arguments are the error code, and a detailed message.
+ The detailed message defaults to the short entry matching the
+ response code.
+
+ This sends an error response (so it must be called before any
+ output has been generated), logs the error, and finally sends
+ a piece of HTML explaining the error to the user.
+
+ """
+
+ try:
+ short, long = self.responses[code]
+ except KeyError:
+ short, long = '???', '???'
+ if message is None:
+ message = short
+ explain = long
+ self.log_error("code %d, message %s", code, message)
+ self.send_response(code, message)
+ self.send_header('Connection', 'close')
+
+ # Message body is omitted for cases described in:
+ # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
+ # - RFC7231: 6.3.6. 205(Reset Content)
+ content = None
+ if code >= 200 and code not in (204, 205, 304):
+ # HTML encode to prevent Cross Site Scripting attacks
+ # (see bug #1100201)
+ content = (self.error_message_format % {
+ 'code': code,
+ 'message': _quote_html(message),
+ 'explain': explain
+ })
+ self.send_header("Content-Type", self.error_content_type)
+ self.end_headers()
+
+ if self.command != 'HEAD' and content:
+ self.wfile.write(content)
+
+ error_message_format = DEFAULT_ERROR_MESSAGE
+ error_content_type = DEFAULT_ERROR_CONTENT_TYPE
+
+ def send_response(self, code, message=None):
+ """Send the response header and log the response code.
+
+ Also send two standard headers with the server software
+ version and the current date.
+
+ """
+ self.log_request(code)
+ if message is None:
+ if code in self.responses:
+ message = self.responses[code][0]
+ else:
+ message = ''
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("%s %d %s\r\n" %
+ (self.protocol_version, code, message))
+ # print (self.protocol_version, code, message)
+ self.send_header('Server', self.version_string())
+ self.send_header('Date', self.date_time_string())
+
+ def send_header(self, keyword, value):
+ """Send a MIME header."""
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("%s: %s\r\n" % (keyword, value))
+
+ if keyword.lower() == 'connection':
+ if value.lower() == 'close':
+ self.close_connection = 1
+ elif value.lower() == 'keep-alive':
+ self.close_connection = 0
+
+ def end_headers(self):
+ """Send the blank line ending the MIME headers."""
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("\r\n")
+
+ def log_request(self, code='-', size='-'):
+ """Log an accepted request.
+
+ This is called by send_response().
+
+ """
+
+ self.log_message('"%s" %s %s',
+ self.requestline, str(code), str(size))
+
+ def log_error(self, format, *args):
+ """Log an error.
+
+ This is called when a request cannot be fulfilled. By
+ default it passes the message on to log_message().
+
+ Arguments are the same as for log_message().
+
+ XXX This should go to the separate error log.
+
+ """
+
+ self.log_message(format, *args)
+
+ def log_message(self, format, *args):
+ """Log an arbitrary message.
+
+ This is used by all other logging functions. Override
+ it if you have specific logging wishes.
+
+ The first argument, FORMAT, is a format string for the
+ message to be logged. If the format string contains
+ any % escapes requiring parameters, they should be
+ specified as subsequent arguments (it's just like
+ printf!).
+
+ The client ip address and current date/time are prefixed to every
+ message.
+
+ """
+
+ sys.stderr.write("%s - - [%s] %s\n" %
+ (self.client_address[0],
+ self.log_date_time_string(),
+ format%args))
+
+ def version_string(self):
+ """Return the server software version string."""
+ return self.server_version + ' ' + self.sys_version
+
+ def date_time_string(self, timestamp=None):
+ """Return the current date and time formatted for a message header."""
+ if timestamp is None:
+ timestamp = time.time()
+ year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
+ s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
+ self.weekdayname[wd],
+ day, self.monthname[month], year,
+ hh, mm, ss)
+ return s
+
+ def log_date_time_string(self):
+ """Return the current time formatted for logging."""
+ now = time.time()
+ year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
+ s = "%02d/%3s/%04d %02d:%02d:%02d" % (
+ day, self.monthname[month], year, hh, mm, ss)
+ return s
+
+ weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+ monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+ def address_string(self):
+ """Return the client address formatted for logging.
+
+ This version looks up the full hostname using gethostbyaddr(),
+ and tries to find a name that contains at least one dot.
+
+ """
+
+ host, port = self.client_address[:2]
+ return socket.getfqdn(host)
+
+ # Essentially static class variables
+
+ # The version of the HTTP protocol we support.
+ # Set this to HTTP/1.1 to enable automatic keepalive
+ protocol_version = "HTTP/1.0"
+
+ # The Message-like class used to parse headers
+ MessageClass = mimetools.Message
+
+ # Table mapping response codes to messages; entries have the
+ # form {code: (shortmessage, longmessage)}.
+ # See RFC 2616.
+ responses = {
+ 100: ('Continue', 'Request received, please continue'),
+ 101: ('Switching Protocols',
+ 'Switching to new protocol; obey Upgrade header'),
+
+ 200: ('OK', 'Request fulfilled, document follows'),
+ 201: ('Created', 'Document created, URL follows'),
+ 202: ('Accepted',
+ 'Request accepted, processing continues off-line'),
+ 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
+ 204: ('No Content', 'Request fulfilled, nothing follows'),
+ 205: ('Reset Content', 'Clear input form for further input.'),
+ 206: ('Partial Content', 'Partial content follows.'),
+
+ 300: ('Multiple Choices',
+ 'Object has several resources -- see URI list'),
+ 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
+ 302: ('Found', 'Object moved temporarily -- see URI list'),
+ 303: ('See Other', 'Object moved -- see Method and URL list'),
+ 304: ('Not Modified',
+ 'Document has not changed since given time'),
+ 305: ('Use Proxy',
+ 'You must use proxy specified in Location to access this '
+ 'resource.'),
+ 307: ('Temporary Redirect',
+ 'Object moved temporarily -- see URI list'),
+
+ 400: ('Bad Request',
+ 'Bad request syntax or unsupported method'),
+ 401: ('Unauthorized',
+ 'No permission -- see authorization schemes'),
+ 402: ('Payment Required',
+ 'No payment -- see charging schemes'),
+ 403: ('Forbidden',
+ 'Request forbidden -- authorization will not help'),
+ 404: ('Not Found', 'Nothing matches the given URI'),
+ 405: ('Method Not Allowed',
+ 'Specified method is invalid for this resource.'),
+ 406: ('Not Acceptable', 'URI not available in preferred format.'),
+ 407: ('Proxy Authentication Required', 'You must authenticate with '
+ 'this proxy before proceeding.'),
+ 408: ('Request Timeout', 'Request timed out; try again later.'),
+ 409: ('Conflict', 'Request conflict.'),
+ 410: ('Gone',
+ 'URI no longer exists and has been permanently removed.'),
+ 411: ('Length Required', 'Client must specify Content-Length.'),
+ 412: ('Precondition Failed', 'Precondition in headers is false.'),
+ 413: ('Request Entity Too Large', 'Entity is too large.'),
+ 414: ('Request-URI Too Long', 'URI is too long.'),
+ 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
+ 416: ('Requested Range Not Satisfiable',
+ 'Cannot satisfy request range.'),
+ 417: ('Expectation Failed',
+ 'Expect condition could not be satisfied.'),
+
+ 500: ('Internal Server Error', 'Server got itself in trouble'),
+ 501: ('Not Implemented',
+ 'Server does not support this operation'),
+ 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
+ 503: ('Service Unavailable',
+ 'The server cannot process the request due to a high load'),
+ 504: ('Gateway Timeout',
+ 'The gateway server did not receive a timely response'),
+ 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+ }
+
+
+def test(HandlerClass = BaseHTTPRequestHandler,
+ ServerClass = HTTPServer, protocol="HTTP/1.0"):
+ """Test the HTTP request handler class.
+
+ This runs an HTTP server on port 8000 (or the first command line
+ argument).
+
+ """
+
+ if sys.argv[1:]:
+ port = int(sys.argv[1])
+ else:
+ port = 8000
+ server_address = ('', port)
+
+ HandlerClass.protocol_version = protocol
+ httpd = ServerClass(server_address, HandlerClass)
+
+ sa = httpd.socket.getsockname()
+ print "Serving HTTP on", sa[0], "port", sa[1], "..."
+ httpd.serve_forever()
+
+
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/Bastion.py b/cashew/Lib/Bastion.py
new file mode 100644
index 0000000..d0dddbf
--- /dev/null
+++ b/cashew/Lib/Bastion.py
@@ -0,0 +1,180 @@
+"""Bastionification utility.
+
+A bastion (for another object -- the 'original') is an object that has
+the same methods as the original but does not give access to its
+instance variables. Bastions have a number of uses, but the most
+obvious one is to provide code executing in restricted mode with a
+safe interface to an object implemented in unrestricted mode.
+
+The bastionification routine has an optional second argument which is
+a filter function. Only those methods for which the filter method
+(called with the method name as argument) returns true are accessible.
+The default filter method returns true unless the method name begins
+with an underscore.
+
+There are a number of possible implementations of bastions. We use a
+'lazy' approach where the bastion's __getattr__() discipline does all
+the work for a particular method the first time it is used. This is
+usually fastest, especially if the user doesn't call all available
+methods. The retrieved methods are stored as instance variables of
+the bastion, so the overhead is only occurred on the first use of each
+method.
+
+Detail: the bastion class has a __repr__() discipline which includes
+the repr() of the original object. This is precomputed when the
+bastion is created.
+
+"""
+from warnings import warnpy3k
+warnpy3k("the Bastion module has been removed in Python 3.0", stacklevel=2)
+del warnpy3k
+
+__all__ = ["BastionClass", "Bastion"]
+
+from types import MethodType
+
+
+class BastionClass:
+
+ """Helper class used by the Bastion() function.
+
+ You could subclass this and pass the subclass as the bastionclass
+ argument to the Bastion() function, as long as the constructor has
+ the same signature (a get() function and a name for the object).
+
+ """
+
+ def __init__(self, get, name):
+ """Constructor.
+
+ Arguments:
+
+ get - a function that gets the attribute value (by name)
+ name - a human-readable name for the original object
+ (suggestion: use repr(object))
+
+ """
+ self._get_ = get
+ self._name_ = name
+
+ def __repr__(self):
+ """Return a representation string.
+
+ This includes the name passed in to the constructor, so that
+ if you print the bastion during debugging, at least you have
+ some idea of what it is.
+
+ """
+ return "" % self._name_
+
+ def __getattr__(self, name):
+ """Get an as-yet undefined attribute value.
+
+ This calls the get() function that was passed to the
+ constructor. The result is stored as an instance variable so
+ that the next time the same attribute is requested,
+ __getattr__() won't be invoked.
+
+ If the get() function raises an exception, this is simply
+ passed on -- exceptions are not cached.
+
+ """
+ attribute = self._get_(name)
+ self.__dict__[name] = attribute
+ return attribute
+
+
+def Bastion(object, filter = lambda name: name[:1] != '_',
+ name=None, bastionclass=BastionClass):
+ """Create a bastion for an object, using an optional filter.
+
+ See the Bastion module's documentation for background.
+
+ Arguments:
+
+ object - the original object
+ filter - a predicate that decides whether a function name is OK;
+ by default all names are OK that don't start with '_'
+ name - the name of the object; default repr(object)
+ bastionclass - class used to create the bastion; default BastionClass
+
+ """
+
+ raise RuntimeError, "This code is not secure in Python 2.2 and later"
+
+ # Note: we define *two* ad-hoc functions here, get1 and get2.
+ # Both are intended to be called in the same way: get(name).
+ # It is clear that the real work (getting the attribute
+ # from the object and calling the filter) is done in get1.
+ # Why can't we pass get1 to the bastion? Because the user
+ # would be able to override the filter argument! With get2,
+ # overriding the default argument is no security loophole:
+ # all it does is call it.
+ # Also notice that we can't place the object and filter as
+ # instance variables on the bastion object itself, since
+ # the user has full access to all instance variables!
+
+ def get1(name, object=object, filter=filter):
+ """Internal function for Bastion(). See source comments."""
+ if filter(name):
+ attribute = getattr(object, name)
+ if type(attribute) == MethodType:
+ return attribute
+ raise AttributeError, name
+
+ def get2(name, get1=get1):
+ """Internal function for Bastion(). See source comments."""
+ return get1(name)
+
+ if name is None:
+ name = repr(object)
+ return bastionclass(get2, name)
+
+
+def _test():
+ """Test the Bastion() function."""
+ class Original:
+ def __init__(self):
+ self.sum = 0
+ def add(self, n):
+ self._add(n)
+ def _add(self, n):
+ self.sum = self.sum + n
+ def total(self):
+ return self.sum
+ o = Original()
+ b = Bastion(o)
+ testcode = """if 1:
+ b.add(81)
+ b.add(18)
+ print "b.total() =", b.total()
+ try:
+ print "b.sum =", b.sum,
+ except:
+ print "inaccessible"
+ else:
+ print "accessible"
+ try:
+ print "b._add =", b._add,
+ except:
+ print "inaccessible"
+ else:
+ print "accessible"
+ try:
+ print "b._get_.func_defaults =", map(type, b._get_.func_defaults),
+ except:
+ print "inaccessible"
+ else:
+ print "accessible"
+ \n"""
+ exec testcode
+ print '='*20, "Using rexec:", '='*20
+ import rexec
+ r = rexec.RExec()
+ m = r.add_module('__main__')
+ m.b = b
+ r.r_exec(testcode)
+
+
+if __name__ == '__main__':
+ _test()
diff --git a/cashew/Lib/CGIHTTPServer.py b/cashew/Lib/CGIHTTPServer.py
new file mode 100644
index 0000000..5620083
--- /dev/null
+++ b/cashew/Lib/CGIHTTPServer.py
@@ -0,0 +1,378 @@
+"""CGI-savvy HTTP Server.
+
+This module builds on SimpleHTTPServer by implementing GET and POST
+requests to cgi-bin scripts.
+
+If the os.fork() function is not present (e.g. on Windows),
+os.popen2() is used as a fallback, with slightly altered semantics; if
+that function is not present either (e.g. on Macintosh), only Python
+scripts are supported, and they are executed by the current process.
+
+In all cases, the implementation is intentionally naive -- all
+requests are executed sychronously.
+
+SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
+-- it may execute arbitrary Python code or external programs.
+
+Note that status code 200 is sent prior to execution of a CGI script, so
+scripts cannot send other status codes such as 302 (redirect).
+"""
+
+
+__version__ = "0.4"
+
+__all__ = ["CGIHTTPRequestHandler"]
+
+import os
+import sys
+import urllib
+import BaseHTTPServer
+import SimpleHTTPServer
+import select
+import copy
+
+
+class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
+
+ """Complete HTTP server with GET, HEAD and POST commands.
+
+ GET and HEAD also support running CGI scripts.
+
+ The POST command is *only* implemented for CGI scripts.
+
+ """
+
+ # Determine platform specifics
+ have_fork = hasattr(os, 'fork')
+ have_popen2 = hasattr(os, 'popen2')
+ have_popen3 = hasattr(os, 'popen3')
+
+ # Make rfile unbuffered -- we need to read one line and then pass
+ # the rest to a subprocess, so we can't use buffered input.
+ rbufsize = 0
+
+ def do_POST(self):
+ """Serve a POST request.
+
+ This is only implemented for CGI scripts.
+
+ """
+
+ if self.is_cgi():
+ self.run_cgi()
+ else:
+ self.send_error(501, "Can only POST to CGI scripts")
+
+ def send_head(self):
+ """Version of send_head that support CGI scripts"""
+ if self.is_cgi():
+ return self.run_cgi()
+ else:
+ return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
+
+ def is_cgi(self):
+ """Test whether self.path corresponds to a CGI script.
+
+ Returns True and updates the cgi_info attribute to the tuple
+ (dir, rest) if self.path requires running a CGI script.
+ Returns False otherwise.
+
+ If any exception is raised, the caller should assume that
+ self.path was rejected as invalid and act accordingly.
+
+ The default implementation tests whether the normalized url
+ path begins with one of the strings in self.cgi_directories
+ (and the next character is a '/' or the end of the string).
+ """
+ collapsed_path = _url_collapse_path(self.path)
+ dir_sep = collapsed_path.find('/', 1)
+ head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
+ if head in self.cgi_directories:
+ self.cgi_info = head, tail
+ return True
+ return False
+
+ cgi_directories = ['/cgi-bin', '/htbin']
+
+ def is_executable(self, path):
+ """Test whether argument path is an executable file."""
+ return executable(path)
+
+ def is_python(self, path):
+ """Test whether argument path is a Python script."""
+ head, tail = os.path.splitext(path)
+ return tail.lower() in (".py", ".pyw")
+
+ def run_cgi(self):
+ """Execute a CGI script."""
+ dir, rest = self.cgi_info
+ path = dir + '/' + rest
+ i = path.find('/', len(dir)+1)
+ while i >= 0:
+ nextdir = path[:i]
+ nextrest = path[i+1:]
+
+ scriptdir = self.translate_path(nextdir)
+ if os.path.isdir(scriptdir):
+ dir, rest = nextdir, nextrest
+ i = path.find('/', len(dir)+1)
+ else:
+ break
+
+ # find an explicit query string, if present.
+ rest, _, query = rest.partition('?')
+
+ # dissect the part after the directory name into a script name &
+ # a possible additional path, to be stored in PATH_INFO.
+ i = rest.find('/')
+ if i >= 0:
+ script, rest = rest[:i], rest[i:]
+ else:
+ script, rest = rest, ''
+
+ scriptname = dir + '/' + script
+ scriptfile = self.translate_path(scriptname)
+ if not os.path.exists(scriptfile):
+ self.send_error(404, "No such CGI script (%r)" % scriptname)
+ return
+ if not os.path.isfile(scriptfile):
+ self.send_error(403, "CGI script is not a plain file (%r)" %
+ scriptname)
+ return
+ ispy = self.is_python(scriptname)
+ if not ispy:
+ if not (self.have_fork or self.have_popen2 or self.have_popen3):
+ self.send_error(403, "CGI script is not a Python script (%r)" %
+ scriptname)
+ return
+ if not self.is_executable(scriptfile):
+ self.send_error(403, "CGI script is not executable (%r)" %
+ scriptname)
+ return
+
+ # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
+ # XXX Much of the following could be prepared ahead of time!
+ env = copy.deepcopy(os.environ)
+ env['SERVER_SOFTWARE'] = self.version_string()
+ env['SERVER_NAME'] = self.server.server_name
+ env['GATEWAY_INTERFACE'] = 'CGI/1.1'
+ env['SERVER_PROTOCOL'] = self.protocol_version
+ env['SERVER_PORT'] = str(self.server.server_port)
+ env['REQUEST_METHOD'] = self.command
+ uqrest = urllib.unquote(rest)
+ env['PATH_INFO'] = uqrest
+ env['PATH_TRANSLATED'] = self.translate_path(uqrest)
+ env['SCRIPT_NAME'] = scriptname
+ if query:
+ env['QUERY_STRING'] = query
+ host = self.address_string()
+ if host != self.client_address[0]:
+ env['REMOTE_HOST'] = host
+ env['REMOTE_ADDR'] = self.client_address[0]
+ authorization = self.headers.getheader("authorization")
+ if authorization:
+ authorization = authorization.split()
+ if len(authorization) == 2:
+ import base64, binascii
+ env['AUTH_TYPE'] = authorization[0]
+ if authorization[0].lower() == "basic":
+ try:
+ authorization = base64.decodestring(authorization[1])
+ except binascii.Error:
+ pass
+ else:
+ authorization = authorization.split(':')
+ if len(authorization) == 2:
+ env['REMOTE_USER'] = authorization[0]
+ # XXX REMOTE_IDENT
+ if self.headers.typeheader is None:
+ env['CONTENT_TYPE'] = self.headers.type
+ else:
+ env['CONTENT_TYPE'] = self.headers.typeheader
+ length = self.headers.getheader('content-length')
+ if length:
+ env['CONTENT_LENGTH'] = length
+ referer = self.headers.getheader('referer')
+ if referer:
+ env['HTTP_REFERER'] = referer
+ accept = []
+ for line in self.headers.getallmatchingheaders('accept'):
+ if line[:1] in "\t\n\r ":
+ accept.append(line.strip())
+ else:
+ accept = accept + line[7:].split(',')
+ env['HTTP_ACCEPT'] = ','.join(accept)
+ ua = self.headers.getheader('user-agent')
+ if ua:
+ env['HTTP_USER_AGENT'] = ua
+ co = filter(None, self.headers.getheaders('cookie'))
+ if co:
+ env['HTTP_COOKIE'] = ', '.join(co)
+ # XXX Other HTTP_* headers
+ # Since we're setting the env in the parent, provide empty
+ # values to override previously set values
+ for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
+ 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
+ env.setdefault(k, "")
+
+ self.send_response(200, "Script output follows")
+
+ decoded_query = query.replace('+', ' ')
+
+ if self.have_fork:
+ # Unix -- fork as we should
+ args = [script]
+ if '=' not in decoded_query:
+ args.append(decoded_query)
+ nobody = nobody_uid()
+ self.wfile.flush() # Always flush before forking
+ pid = os.fork()
+ if pid != 0:
+ # Parent
+ pid, sts = os.waitpid(pid, 0)
+ # throw away additional data [see bug #427345]
+ while select.select([self.rfile], [], [], 0)[0]:
+ if not self.rfile.read(1):
+ break
+ if sts:
+ self.log_error("CGI script exit status %#x", sts)
+ return
+ # Child
+ try:
+ try:
+ os.setuid(nobody)
+ except os.error:
+ pass
+ os.dup2(self.rfile.fileno(), 0)
+ os.dup2(self.wfile.fileno(), 1)
+ os.execve(scriptfile, args, env)
+ except:
+ self.server.handle_error(self.request, self.client_address)
+ os._exit(127)
+
+ else:
+ # Non Unix - use subprocess
+ import subprocess
+ cmdline = [scriptfile]
+ if self.is_python(scriptfile):
+ interp = sys.executable
+ if interp.lower().endswith("w.exe"):
+ # On Windows, use python.exe, not pythonw.exe
+ interp = interp[:-5] + interp[-4:]
+ cmdline = [interp, '-u'] + cmdline
+ if '=' not in query:
+ cmdline.append(query)
+
+ self.log_message("command: %s", subprocess.list2cmdline(cmdline))
+ try:
+ nbytes = int(length)
+ except (TypeError, ValueError):
+ nbytes = 0
+ p = subprocess.Popen(cmdline,
+ stdin = subprocess.PIPE,
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ env = env
+ )
+ if self.command.lower() == "post" and nbytes > 0:
+ data = self.rfile.read(nbytes)
+ else:
+ data = None
+ # throw away additional data [see bug #427345]
+ while select.select([self.rfile._sock], [], [], 0)[0]:
+ if not self.rfile._sock.recv(1):
+ break
+ stdout, stderr = p.communicate(data)
+ self.wfile.write(stdout)
+ if stderr:
+ self.log_error('%s', stderr)
+ p.stderr.close()
+ p.stdout.close()
+ status = p.returncode
+ if status:
+ self.log_error("CGI script exit status %#x", status)
+ else:
+ self.log_message("CGI script exited OK")
+
+
+def _url_collapse_path(path):
+ """
+ Given a URL path, remove extra '/'s and '.' path elements and collapse
+ any '..' references and returns a colllapsed path.
+
+ Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+ The utility of this function is limited to is_cgi method and helps
+ preventing some security attacks.
+
+ Returns: The reconstituted URL, which will always start with a '/'.
+
+ Raises: IndexError if too many '..' occur within the path.
+
+ """
+ # Query component should not be involved.
+ path, _, query = path.partition('?')
+ path = urllib.unquote(path)
+
+ # Similar to os.path.split(os.path.normpath(path)) but specific to URL
+ # path semantics rather than local operating system semantics.
+ path_parts = path.split('/')
+ head_parts = []
+ for part in path_parts[:-1]:
+ if part == '..':
+ head_parts.pop() # IndexError if more '..' than prior parts
+ elif part and part != '.':
+ head_parts.append( part )
+ if path_parts:
+ tail_part = path_parts.pop()
+ if tail_part:
+ if tail_part == '..':
+ head_parts.pop()
+ tail_part = ''
+ elif tail_part == '.':
+ tail_part = ''
+ else:
+ tail_part = ''
+
+ if query:
+ tail_part = '?'.join((tail_part, query))
+
+ splitpath = ('/' + '/'.join(head_parts), tail_part)
+ collapsed_path = "/".join(splitpath)
+
+ return collapsed_path
+
+
+nobody = None
+
+def nobody_uid():
+ """Internal routine to get nobody's uid"""
+ global nobody
+ if nobody:
+ return nobody
+ try:
+ import pwd
+ except ImportError:
+ return -1
+ try:
+ nobody = pwd.getpwnam('nobody')[2]
+ except KeyError:
+ nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
+ return nobody
+
+
+def executable(path):
+ """Test for executable file."""
+ try:
+ st = os.stat(path)
+ except os.error:
+ return False
+ return st.st_mode & 0111 != 0
+
+
+def test(HandlerClass = CGIHTTPRequestHandler,
+ ServerClass = BaseHTTPServer.HTTPServer):
+ SimpleHTTPServer.test(HandlerClass, ServerClass)
+
+
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/ConfigParser.py b/cashew/Lib/ConfigParser.py
new file mode 100644
index 0000000..7e6cdbc
--- /dev/null
+++ b/cashew/Lib/ConfigParser.py
@@ -0,0 +1,753 @@
+"""Configuration file parser.
+
+A setup file consists of sections, lead by a "[section]" header,
+and followed by "name: value" entries, with continuations and such in
+the style of RFC 822.
+
+The option values can contain format strings which refer to other values in
+the same section, or values in a special [DEFAULT] section.
+
+For example:
+
+ something: %(dir)s/whatever
+
+would resolve the "%(dir)s" to the value of dir. All reference
+expansions are done late, on demand.
+
+Intrinsic defaults can be specified by passing them into the
+ConfigParser constructor as a dictionary.
+
+class:
+
+ConfigParser -- responsible for parsing a list of
+ configuration files, and managing the parsed database.
+
+ methods:
+
+ __init__(defaults=None)
+ create the parser and specify a dictionary of intrinsic defaults. The
+ keys must be strings, the values must be appropriate for %()s string
+ interpolation. Note that `__name__' is always an intrinsic default;
+ its value is the section's name.
+
+ sections()
+ return all the configuration section names, sans DEFAULT
+
+ has_section(section)
+ return whether the given section exists
+
+ has_option(section, option)
+ return whether the given option exists in the given section
+
+ options(section)
+ return list of configuration options for the named section
+
+ read(filenames)
+ read and parse the list of named configuration files, given by
+ name. A single filename is also allowed. Non-existing files
+ are ignored. Return list of successfully read files.
+
+ readfp(fp, filename=None)
+ read and parse one configuration file, given as a file object.
+ The filename defaults to fp.name; it is only used in error
+ messages (if fp has no `name' attribute, the string `??>' is used).
+
+ get(section, option, raw=False, vars=None)
+ return a string value for the named option. All % interpolations are
+ expanded in the return values, based on the defaults passed into the
+ constructor and the DEFAULT section. Additional substitutions may be
+ provided using the `vars' argument, which must be a dictionary whose
+ contents override any pre-existing defaults.
+
+ getint(section, options)
+ like get(), but convert value to an integer
+
+ getfloat(section, options)
+ like get(), but convert value to a float
+
+ getboolean(section, options)
+ like get(), but convert value to a boolean (currently case
+ insensitively defined as 0, false, no, off for False, and 1, true,
+ yes, on for True). Returns False or True.
+
+ items(section, raw=False, vars=None)
+ return a list of tuples with (name, value) for each option
+ in the section.
+
+ remove_section(section)
+ remove the given file section and all its options
+
+ remove_option(section, option)
+ remove the given option from the given section
+
+ set(section, option, value)
+ set the given option
+
+ write(fp)
+ write the configuration state in .ini format
+"""
+
+try:
+ from collections import OrderedDict as _default_dict
+except ImportError:
+ # fallback for setup.py which hasn't yet built _collections
+ _default_dict = dict
+
+import re
+
+__all__ = ["NoSectionError", "DuplicateSectionError", "NoOptionError",
+ "InterpolationError", "InterpolationDepthError",
+ "InterpolationSyntaxError", "ParsingError",
+ "MissingSectionHeaderError",
+ "ConfigParser", "SafeConfigParser", "RawConfigParser",
+ "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH"]
+
+DEFAULTSECT = "DEFAULT"
+
+MAX_INTERPOLATION_DEPTH = 10
+
+
+
+# exception classes
+class Error(Exception):
+ """Base class for ConfigParser exceptions."""
+
+ def _get_message(self):
+ """Getter for 'message'; needed only to override deprecation in
+ BaseException."""
+ return self.__message
+
+ def _set_message(self, value):
+ """Setter for 'message'; needed only to override deprecation in
+ BaseException."""
+ self.__message = value
+
+ # BaseException.message has been deprecated since Python 2.6. To prevent
+ # DeprecationWarning from popping up over this pre-existing attribute, use
+ # a new property that takes lookup precedence.
+ message = property(_get_message, _set_message)
+
+ def __init__(self, msg=''):
+ self.message = msg
+ Exception.__init__(self, msg)
+
+ def __repr__(self):
+ return self.message
+
+ __str__ = __repr__
+
+class NoSectionError(Error):
+ """Raised when no section matches a requested option."""
+
+ def __init__(self, section):
+ Error.__init__(self, 'No section: %r' % (section,))
+ self.section = section
+ self.args = (section, )
+
+class DuplicateSectionError(Error):
+ """Raised when a section is multiply-created."""
+
+ def __init__(self, section):
+ Error.__init__(self, "Section %r already exists" % section)
+ self.section = section
+ self.args = (section, )
+
+class NoOptionError(Error):
+ """A requested option was not found."""
+
+ def __init__(self, option, section):
+ Error.__init__(self, "No option %r in section: %r" %
+ (option, section))
+ self.option = option
+ self.section = section
+ self.args = (option, section)
+
+class InterpolationError(Error):
+ """Base class for interpolation-related exceptions."""
+
+ def __init__(self, option, section, msg):
+ Error.__init__(self, msg)
+ self.option = option
+ self.section = section
+ self.args = (option, section, msg)
+
+class InterpolationMissingOptionError(InterpolationError):
+ """A string substitution required a setting which was not available."""
+
+ def __init__(self, option, section, rawval, reference):
+ msg = ("Bad value substitution:\n"
+ "\tsection: [%s]\n"
+ "\toption : %s\n"
+ "\tkey : %s\n"
+ "\trawval : %s\n"
+ % (section, option, reference, rawval))
+ InterpolationError.__init__(self, option, section, msg)
+ self.reference = reference
+ self.args = (option, section, rawval, reference)
+
+class InterpolationSyntaxError(InterpolationError):
+ """Raised when the source text into which substitutions are made
+ does not conform to the required syntax."""
+
+class InterpolationDepthError(InterpolationError):
+ """Raised when substitutions are nested too deeply."""
+
+ def __init__(self, option, section, rawval):
+ msg = ("Value interpolation too deeply recursive:\n"
+ "\tsection: [%s]\n"
+ "\toption : %s\n"
+ "\trawval : %s\n"
+ % (section, option, rawval))
+ InterpolationError.__init__(self, option, section, msg)
+ self.args = (option, section, rawval)
+
+class ParsingError(Error):
+ """Raised when a configuration file does not follow legal syntax."""
+
+ def __init__(self, filename):
+ Error.__init__(self, 'File contains parsing errors: %s' % filename)
+ self.filename = filename
+ self.errors = []
+ self.args = (filename, )
+
+ def append(self, lineno, line):
+ self.errors.append((lineno, line))
+ self.message += '\n\t[line %2d]: %s' % (lineno, line)
+
+class MissingSectionHeaderError(ParsingError):
+ """Raised when a key-value pair is found before any section header."""
+
+ def __init__(self, filename, lineno, line):
+ Error.__init__(
+ self,
+ 'File contains no section headers.\nfile: %s, line: %d\n%r' %
+ (filename, lineno, line))
+ self.filename = filename
+ self.lineno = lineno
+ self.line = line
+ self.args = (filename, lineno, line)
+
+
+class RawConfigParser:
+ def __init__(self, defaults=None, dict_type=_default_dict,
+ allow_no_value=False):
+ self._dict = dict_type
+ self._sections = self._dict()
+ self._defaults = self._dict()
+ if allow_no_value:
+ self._optcre = self.OPTCRE_NV
+ else:
+ self._optcre = self.OPTCRE
+ if defaults:
+ for key, value in defaults.items():
+ self._defaults[self.optionxform(key)] = value
+
+ def defaults(self):
+ return self._defaults
+
+ def sections(self):
+ """Return a list of section names, excluding [DEFAULT]"""
+ # self._sections will never have [DEFAULT] in it
+ return self._sections.keys()
+
+ def add_section(self, section):
+ """Create a new section in the configuration.
+
+ Raise DuplicateSectionError if a section by the specified name
+ already exists. Raise ValueError if name is DEFAULT or any of it's
+ case-insensitive variants.
+ """
+ if section.lower() == "default":
+ raise ValueError, 'Invalid section name: %s' % section
+
+ if section in self._sections:
+ raise DuplicateSectionError(section)
+ self._sections[section] = self._dict()
+
+ def has_section(self, section):
+ """Indicate whether the named section is present in the configuration.
+
+ The DEFAULT section is not acknowledged.
+ """
+ return section in self._sections
+
+ def options(self, section):
+ """Return a list of option names for the given section name."""
+ try:
+ opts = self._sections[section].copy()
+ except KeyError:
+ raise NoSectionError(section)
+ opts.update(self._defaults)
+ if '__name__' in opts:
+ del opts['__name__']
+ return opts.keys()
+
+ def read(self, filenames):
+ """Read and parse a filename or a list of filenames.
+
+ Files that cannot be opened are silently ignored; this is
+ designed so that you can specify a list of potential
+ configuration file locations (e.g. current directory, user's
+ home directory, systemwide directory), and all existing
+ configuration files in the list will be read. A single
+ filename may also be given.
+
+ Return list of successfully read files.
+ """
+ if isinstance(filenames, basestring):
+ filenames = [filenames]
+ read_ok = []
+ for filename in filenames:
+ try:
+ fp = open(filename)
+ except IOError:
+ continue
+ self._read(fp, filename)
+ fp.close()
+ read_ok.append(filename)
+ return read_ok
+
+ def readfp(self, fp, filename=None):
+ """Like read() but the argument must be a file-like object.
+
+ The `fp' argument must have a `readline' method. Optional
+ second argument is the `filename', which if not given, is
+ taken from fp.name. If fp has no `name' attribute, `??>' is
+ used.
+
+ """
+ if filename is None:
+ try:
+ filename = fp.name
+ except AttributeError:
+ filename = '??>'
+ self._read(fp, filename)
+
+ def get(self, section, option):
+ opt = self.optionxform(option)
+ if section not in self._sections:
+ if section != DEFAULTSECT:
+ raise NoSectionError(section)
+ if opt in self._defaults:
+ return self._defaults[opt]
+ else:
+ raise NoOptionError(option, section)
+ elif opt in self._sections[section]:
+ return self._sections[section][opt]
+ elif opt in self._defaults:
+ return self._defaults[opt]
+ else:
+ raise NoOptionError(option, section)
+
+ def items(self, section):
+ try:
+ d2 = self._sections[section]
+ except KeyError:
+ if section != DEFAULTSECT:
+ raise NoSectionError(section)
+ d2 = self._dict()
+ d = self._defaults.copy()
+ d.update(d2)
+ if "__name__" in d:
+ del d["__name__"]
+ return d.items()
+
+ def _get(self, section, conv, option):
+ return conv(self.get(section, option))
+
+ def getint(self, section, option):
+ return self._get(section, int, option)
+
+ def getfloat(self, section, option):
+ return self._get(section, float, option)
+
+ _boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True,
+ '0': False, 'no': False, 'false': False, 'off': False}
+
+ def getboolean(self, section, option):
+ v = self.get(section, option)
+ if v.lower() not in self._boolean_states:
+ raise ValueError, 'Not a boolean: %s' % v
+ return self._boolean_states[v.lower()]
+
+ def optionxform(self, optionstr):
+ return optionstr.lower()
+
+ def has_option(self, section, option):
+ """Check for the existence of a given option in a given section."""
+ if not section or section == DEFAULTSECT:
+ option = self.optionxform(option)
+ return option in self._defaults
+ elif section not in self._sections:
+ return False
+ else:
+ option = self.optionxform(option)
+ return (option in self._sections[section]
+ or option in self._defaults)
+
+ def set(self, section, option, value=None):
+ """Set an option."""
+ if not section or section == DEFAULTSECT:
+ sectdict = self._defaults
+ else:
+ try:
+ sectdict = self._sections[section]
+ except KeyError:
+ raise NoSectionError(section)
+ sectdict[self.optionxform(option)] = value
+
+ def write(self, fp):
+ """Write an .ini-format representation of the configuration state."""
+ if self._defaults:
+ fp.write("[%s]\n" % DEFAULTSECT)
+ for (key, value) in self._defaults.items():
+ fp.write("%s = %s\n" % (key, str(value).replace('\n', '\n\t')))
+ fp.write("\n")
+ for section in self._sections:
+ fp.write("[%s]\n" % section)
+ for (key, value) in self._sections[section].items():
+ if key == "__name__":
+ continue
+ if (value is not None) or (self._optcre == self.OPTCRE):
+ key = " = ".join((key, str(value).replace('\n', '\n\t')))
+ fp.write("%s\n" % (key))
+ fp.write("\n")
+
+ def remove_option(self, section, option):
+ """Remove an option."""
+ if not section or section == DEFAULTSECT:
+ sectdict = self._defaults
+ else:
+ try:
+ sectdict = self._sections[section]
+ except KeyError:
+ raise NoSectionError(section)
+ option = self.optionxform(option)
+ existed = option in sectdict
+ if existed:
+ del sectdict[option]
+ return existed
+
+ def remove_section(self, section):
+ """Remove a file section."""
+ existed = section in self._sections
+ if existed:
+ del self._sections[section]
+ return existed
+
+ #
+ # Regular expressions for parsing section headers and options.
+ #
+ SECTCRE = re.compile(
+ r'\[' # [
+ r'(?P[^]]+)' # very permissive!
+ r'\]' # ]
+ )
+ OPTCRE = re.compile(
+ r'(?P[^:=\s][^:=]*)' # very permissive!
+ r'\s*(?P[:=])\s*' # any number of space/tab,
+ # followed by separator
+ # (either : or =), followed
+ # by any # space/tab
+ r'(?P.*)$' # everything up to eol
+ )
+ OPTCRE_NV = re.compile(
+ r'(?P[^:=\s][^:=]*)' # very permissive!
+ r'\s*(?:' # any number of space/tab,
+ r'(?P[:=])\s*' # optionally followed by
+ # separator (either : or
+ # =), followed by any #
+ # space/tab
+ r'(?P.*))?$' # everything up to eol
+ )
+
+ def _read(self, fp, fpname):
+ """Parse a sectioned setup file.
+
+ The sections in setup file contains a title line at the top,
+ indicated by a name in square brackets (`[]'), plus key/value
+ options lines, indicated by `name: value' format lines.
+ Continuations are represented by an embedded newline then
+ leading whitespace. Blank lines, lines beginning with a '#',
+ and just about everything else are ignored.
+ """
+ cursect = None # None, or a dictionary
+ optname = None
+ lineno = 0
+ e = None # None, or an exception
+ while True:
+ line = fp.readline()
+ if not line:
+ break
+ lineno = lineno + 1
+ # comment or blank line?
+ if line.strip() == '' or line[0] in '#;':
+ continue
+ if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR":
+ # no leading whitespace
+ continue
+ # continuation line?
+ if line[0].isspace() and cursect is not None and optname:
+ value = line.strip()
+ if value:
+ cursect[optname].append(value)
+ # a section header or option header?
+ else:
+ # is it a section header?
+ mo = self.SECTCRE.match(line)
+ if mo:
+ sectname = mo.group('header')
+ if sectname in self._sections:
+ cursect = self._sections[sectname]
+ elif sectname == DEFAULTSECT:
+ cursect = self._defaults
+ else:
+ cursect = self._dict()
+ cursect['__name__'] = sectname
+ self._sections[sectname] = cursect
+ # So sections can't start with a continuation line
+ optname = None
+ # no section header in the file?
+ elif cursect is None:
+ raise MissingSectionHeaderError(fpname, lineno, line)
+ # an option line?
+ else:
+ mo = self._optcre.match(line)
+ if mo:
+ optname, vi, optval = mo.group('option', 'vi', 'value')
+ optname = self.optionxform(optname.rstrip())
+ # This check is fine because the OPTCRE cannot
+ # match if it would set optval to None
+ if optval is not None:
+ if vi in ('=', ':') and ';' in optval:
+ # ';' is a comment delimiter only if it follows
+ # a spacing character
+ pos = optval.find(';')
+ if pos != -1 and optval[pos-1].isspace():
+ optval = optval[:pos]
+ optval = optval.strip()
+ # allow empty values
+ if optval == '""':
+ optval = ''
+ cursect[optname] = [optval]
+ else:
+ # valueless option handling
+ cursect[optname] = optval
+ else:
+ # a non-fatal parsing error occurred. set up the
+ # exception but keep going. the exception will be
+ # raised at the end of the file and will contain a
+ # list of all bogus lines
+ if not e:
+ e = ParsingError(fpname)
+ e.append(lineno, repr(line))
+ # if any parsing errors occurred, raise an exception
+ if e:
+ raise e
+
+ # join the multi-line values collected while reading
+ all_sections = [self._defaults]
+ all_sections.extend(self._sections.values())
+ for options in all_sections:
+ for name, val in options.items():
+ if isinstance(val, list):
+ options[name] = '\n'.join(val)
+
+import UserDict as _UserDict
+
+class _Chainmap(_UserDict.DictMixin):
+ """Combine multiple mappings for successive lookups.
+
+ For example, to emulate Python's normal lookup sequence:
+
+ import __builtin__
+ pylookup = _Chainmap(locals(), globals(), vars(__builtin__))
+ """
+
+ def __init__(self, *maps):
+ self._maps = maps
+
+ def __getitem__(self, key):
+ for mapping in self._maps:
+ try:
+ return mapping[key]
+ except KeyError:
+ pass
+ raise KeyError(key)
+
+ def keys(self):
+ result = []
+ seen = set()
+ for mapping in self._maps:
+ for key in mapping:
+ if key not in seen:
+ result.append(key)
+ seen.add(key)
+ return result
+
+class ConfigParser(RawConfigParser):
+
+ def get(self, section, option, raw=False, vars=None):
+ """Get an option value for a given section.
+
+ If `vars' is provided, it must be a dictionary. The option is looked up
+ in `vars' (if provided), `section', and in `defaults' in that order.
+
+ All % interpolations are expanded in the return values, unless the
+ optional argument `raw' is true. Values for interpolation keys are
+ looked up in the same manner as the option.
+
+ The section DEFAULT is special.
+ """
+ sectiondict = {}
+ try:
+ sectiondict = self._sections[section]
+ except KeyError:
+ if section != DEFAULTSECT:
+ raise NoSectionError(section)
+ # Update with the entry specific variables
+ vardict = {}
+ if vars:
+ for key, value in vars.items():
+ vardict[self.optionxform(key)] = value
+ d = _Chainmap(vardict, sectiondict, self._defaults)
+ option = self.optionxform(option)
+ try:
+ value = d[option]
+ except KeyError:
+ raise NoOptionError(option, section)
+
+ if raw or value is None:
+ return value
+ else:
+ return self._interpolate(section, option, value, d)
+
+ def items(self, section, raw=False, vars=None):
+ """Return a list of tuples with (name, value) for each option
+ in the section.
+
+ All % interpolations are expanded in the return values, based on the
+ defaults passed into the constructor, unless the optional argument
+ `raw' is true. Additional substitutions may be provided using the
+ `vars' argument, which must be a dictionary whose contents overrides
+ any pre-existing defaults.
+
+ The section DEFAULT is special.
+ """
+ d = self._defaults.copy()
+ try:
+ d.update(self._sections[section])
+ except KeyError:
+ if section != DEFAULTSECT:
+ raise NoSectionError(section)
+ # Update with the entry specific variables
+ if vars:
+ for key, value in vars.items():
+ d[self.optionxform(key)] = value
+ options = d.keys()
+ if "__name__" in options:
+ options.remove("__name__")
+ if raw:
+ return [(option, d[option])
+ for option in options]
+ else:
+ return [(option, self._interpolate(section, option, d[option], d))
+ for option in options]
+
+ def _interpolate(self, section, option, rawval, vars):
+ # do the string interpolation
+ value = rawval
+ depth = MAX_INTERPOLATION_DEPTH
+ while depth: # Loop through this until it's done
+ depth -= 1
+ if value and "%(" in value:
+ value = self._KEYCRE.sub(self._interpolation_replace, value)
+ try:
+ value = value % vars
+ except KeyError, e:
+ raise InterpolationMissingOptionError(
+ option, section, rawval, e.args[0])
+ else:
+ break
+ if value and "%(" in value:
+ raise InterpolationDepthError(option, section, rawval)
+ return value
+
+ _KEYCRE = re.compile(r"%\(([^)]*)\)s|.")
+
+ def _interpolation_replace(self, match):
+ s = match.group(1)
+ if s is None:
+ return match.group()
+ else:
+ return "%%(%s)s" % self.optionxform(s)
+
+
+class SafeConfigParser(ConfigParser):
+
+ def _interpolate(self, section, option, rawval, vars):
+ # do the string interpolation
+ L = []
+ self._interpolate_some(option, L, rawval, section, vars, 1)
+ return ''.join(L)
+
+ _interpvar_re = re.compile(r"%\(([^)]+)\)s")
+
+ def _interpolate_some(self, option, accum, rest, section, map, depth):
+ if depth > MAX_INTERPOLATION_DEPTH:
+ raise InterpolationDepthError(option, section, rest)
+ while rest:
+ p = rest.find("%")
+ if p < 0:
+ accum.append(rest)
+ return
+ if p > 0:
+ accum.append(rest[:p])
+ rest = rest[p:]
+ # p is no longer used
+ c = rest[1:2]
+ if c == "%":
+ accum.append("%")
+ rest = rest[2:]
+ elif c == "(":
+ m = self._interpvar_re.match(rest)
+ if m is None:
+ raise InterpolationSyntaxError(option, section,
+ "bad interpolation variable reference %r" % rest)
+ var = self.optionxform(m.group(1))
+ rest = rest[m.end():]
+ try:
+ v = map[var]
+ except KeyError:
+ raise InterpolationMissingOptionError(
+ option, section, rest, var)
+ if "%" in v:
+ self._interpolate_some(option, accum, v,
+ section, map, depth + 1)
+ else:
+ accum.append(v)
+ else:
+ raise InterpolationSyntaxError(
+ option, section,
+ "'%%' must be followed by '%%' or '(', found: %r" % (rest,))
+
+ def set(self, section, option, value=None):
+ """Set an option. Extend ConfigParser.set: check for string values."""
+ # The only legal non-string value if we allow valueless
+ # options is None, so we need to check if the value is a
+ # string if:
+ # - we do not allow valueless options, or
+ # - we allow valueless options but the value is not None
+ if self._optcre is self.OPTCRE or value:
+ if not isinstance(value, basestring):
+ raise TypeError("option values must be strings")
+ if value is not None:
+ # check for bad percent signs:
+ # first, replace all "good" interpolations
+ tmp_value = value.replace('%%', '')
+ tmp_value = self._interpvar_re.sub('', tmp_value)
+ # then, check if there's a lone percent sign left
+ if '%' in tmp_value:
+ raise ValueError("invalid interpolation syntax in %r at "
+ "position %d" % (value, tmp_value.find('%')))
+ ConfigParser.set(self, section, option, value)
diff --git a/cashew/Lib/Cookie.py b/cashew/Lib/Cookie.py
new file mode 100644
index 0000000..1082363
--- /dev/null
+++ b/cashew/Lib/Cookie.py
@@ -0,0 +1,773 @@
+####
+# Copyright 2000 by Timothy O'Malley
+#
+# All Rights Reserved
+#
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Timothy O'Malley not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
+# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+#
+####
+#
+# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
+# by Timothy O'Malley
+#
+# Cookie.py is a Python module for the handling of HTTP
+# cookies as a Python dictionary. See RFC 2109 for more
+# information on cookies.
+#
+# The original idea to treat Cookies as a dictionary came from
+# Dave Mitchell (davem@magnet.com) in 1995, when he released the
+# first version of nscookie.py.
+#
+####
+
+r"""
+Here's a sample session to show how to use this module.
+At the moment, this is the only documentation.
+
+The Basics
+----------
+
+Importing is easy..
+
+ >>> import Cookie
+
+Most of the time you start by creating a cookie. Cookies come in
+three flavors, each with slightly different encoding semantics, but
+more on that later.
+
+ >>> C = Cookie.SimpleCookie()
+ >>> C = Cookie.SerialCookie()
+ >>> C = Cookie.SmartCookie()
+
+[Note: Long-time users of Cookie.py will remember using
+Cookie.Cookie() to create a Cookie object. Although deprecated, it
+is still supported by the code. See the Backward Compatibility notes
+for more information.]
+
+Once you've created your Cookie, you can add values just as if it were
+a dictionary.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C["fig"] = "newton"
+ >>> C["sugar"] = "wafer"
+ >>> C.output()
+ 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
+
+Notice that the printable representation of a Cookie is the
+appropriate format for a Set-Cookie: header. This is the
+default behavior. You can change the header and printed
+attributes by using the .output() function
+
+ >>> C = Cookie.SmartCookie()
+ >>> C["rocky"] = "road"
+ >>> C["rocky"]["path"] = "/cookie"
+ >>> print C.output(header="Cookie:")
+ Cookie: rocky=road; Path=/cookie
+ >>> print C.output(attrs=[], header="Cookie:")
+ Cookie: rocky=road
+
+The load() method of a Cookie extracts cookies from a string. In a
+CGI script, you would use this method to extract the cookies from the
+HTTP_COOKIE environment variable.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C.load("chips=ahoy; vienna=finger")
+ >>> C.output()
+ 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
+
+The load() method is darn-tootin smart about identifying cookies
+within a string. Escaped quotation marks, nested semicolons, and other
+such trickeries do not confuse it.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
+ >>> print C
+ Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
+
+Each element of the Cookie also supports all of the RFC 2109
+Cookie attributes. Here's an example which sets the Path
+attribute.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C["oreo"] = "doublestuff"
+ >>> C["oreo"]["path"] = "/"
+ >>> print C
+ Set-Cookie: oreo=doublestuff; Path=/
+
+Each dictionary element has a 'value' attribute, which gives you
+back the value associated with the key.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C["twix"] = "none for you"
+ >>> C["twix"].value
+ 'none for you'
+
+
+A Bit More Advanced
+-------------------
+
+As mentioned before, there are three different flavors of Cookie
+objects, each with different encoding/decoding semantics. This
+section briefly discusses the differences.
+
+SimpleCookie
+
+The SimpleCookie expects that all values should be standard strings.
+Just to be sure, SimpleCookie invokes the str() builtin to convert
+the value to a string, when the values are set dictionary-style.
+
+ >>> C = Cookie.SimpleCookie()
+ >>> C["number"] = 7
+ >>> C["string"] = "seven"
+ >>> C["number"].value
+ '7'
+ >>> C["string"].value
+ 'seven'
+ >>> C.output()
+ 'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
+
+
+SerialCookie
+
+The SerialCookie expects that all values should be serialized using
+cPickle (or pickle, if cPickle isn't available). As a result of
+serializing, SerialCookie can save almost any Python object to a
+value, and recover the exact same object when the cookie has been
+returned. (SerialCookie can yield some strange-looking cookie
+values, however.)
+
+ >>> C = Cookie.SerialCookie()
+ >>> C["number"] = 7
+ >>> C["string"] = "seven"
+ >>> C["number"].value
+ 7
+ >>> C["string"].value
+ 'seven'
+ >>> C.output()
+ 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string="S\'seven\'\\012p1\\012."'
+
+Be warned, however, if SerialCookie cannot de-serialize a value (because
+it isn't a valid pickle'd object), IT WILL RAISE AN EXCEPTION.
+
+
+SmartCookie
+
+The SmartCookie combines aspects of each of the other two flavors.
+When setting a value in a dictionary-fashion, the SmartCookie will
+serialize (ala cPickle) the value *if and only if* it isn't a
+Python string. String objects are *not* serialized. Similarly,
+when the load() method parses out values, it attempts to de-serialize
+the value. If it fails, then it fallsback to treating the value
+as a string.
+
+ >>> C = Cookie.SmartCookie()
+ >>> C["number"] = 7
+ >>> C["string"] = "seven"
+ >>> C["number"].value
+ 7
+ >>> C["string"].value
+ 'seven'
+ >>> C.output()
+ 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string=seven'
+
+
+Backwards Compatibility
+-----------------------
+
+In order to keep compatibility with earlier versions of Cookie.py,
+it is still possible to use Cookie.Cookie() to create a Cookie. In
+fact, this simply returns a SmartCookie.
+
+ >>> C = Cookie.Cookie()
+ >>> print C.__class__.__name__
+ SmartCookie
+
+
+Finis.
+""" #"
+# ^
+# |----helps out font-lock
+
+#
+# Import our required modules
+#
+import string
+
+try:
+ from cPickle import dumps, loads
+except ImportError:
+ from pickle import dumps, loads
+
+import re, warnings
+
+__all__ = ["CookieError","BaseCookie","SimpleCookie","SerialCookie",
+ "SmartCookie","Cookie"]
+
+_nulljoin = ''.join
+_semispacejoin = '; '.join
+_spacejoin = ' '.join
+
+#
+# Define an exception visible to External modules
+#
+class CookieError(Exception):
+ pass
+
+
+# These quoting routines conform to the RFC2109 specification, which in
+# turn references the character definitions from RFC2068. They provide
+# a two-way quoting algorithm. Any non-text character is translated
+# into a 4 character sequence: a forward-slash followed by the
+# three-digit octal equivalent of the character. Any '\' or '"' is
+# quoted with a preceding '\' slash.
+#
+# These are taken from RFC2068 and RFC2109.
+# _LegalChars is the list of chars which don't require "'s
+# _Translator hash-table for fast quoting
+#
+_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
+_Translator = {
+ '\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
+ '\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
+ '\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
+ '\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
+ '\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
+ '\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
+ '\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
+ '\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
+ '\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
+ '\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
+ '\036' : '\\036', '\037' : '\\037',
+
+ # Because of the way browsers really handle cookies (as opposed
+ # to what the RFC says) we also encode , and ;
+
+ ',' : '\\054', ';' : '\\073',
+
+ '"' : '\\"', '\\' : '\\\\',
+
+ '\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
+ '\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
+ '\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
+ '\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
+ '\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
+ '\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
+ '\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
+ '\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
+ '\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
+ '\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
+ '\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
+ '\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
+ '\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
+ '\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
+ '\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
+ '\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
+ '\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
+ '\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
+ '\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
+ '\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
+ '\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
+ '\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
+ '\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
+ '\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
+ '\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
+ '\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
+ '\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
+ '\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
+ '\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
+ '\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
+ '\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
+ '\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
+ '\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
+ '\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
+ '\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
+ '\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
+ '\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
+ '\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
+ '\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
+ '\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
+ '\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
+ '\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
+ '\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
+ }
+
+_idmap = ''.join(chr(x) for x in xrange(256))
+
+def _quote(str, LegalChars=_LegalChars,
+ idmap=_idmap, translate=string.translate):
+ #
+ # If the string does not need to be double-quoted,
+ # then just return the string. Otherwise, surround
+ # the string in doublequotes and precede quote (with a \)
+ # special characters.
+ #
+ if "" == translate(str, idmap, LegalChars):
+ return str
+ else:
+ return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"'
+# end _quote
+
+
+_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
+_QuotePatt = re.compile(r"[\\].")
+
+def _unquote(str):
+ # If there aren't any doublequotes,
+ # then there can't be any special characters. See RFC 2109.
+ if len(str) < 2:
+ return str
+ if str[0] != '"' or str[-1] != '"':
+ return str
+
+ # We have to assume that we must decode this string.
+ # Down to work.
+
+ # Remove the "s
+ str = str[1:-1]
+
+ # Check for special sequences. Examples:
+ # \012 --> \n
+ # \" --> "
+ #
+ i = 0
+ n = len(str)
+ res = []
+ while 0 <= i < n:
+ Omatch = _OctalPatt.search(str, i)
+ Qmatch = _QuotePatt.search(str, i)
+ if not Omatch and not Qmatch: # Neither matched
+ res.append(str[i:])
+ break
+ # else:
+ j = k = -1
+ if Omatch: j = Omatch.start(0)
+ if Qmatch: k = Qmatch.start(0)
+ if Qmatch and ( not Omatch or k < j ): # QuotePatt matched
+ res.append(str[i:k])
+ res.append(str[k+1])
+ i = k+2
+ else: # OctalPatt matched
+ res.append(str[i:j])
+ res.append( chr( int(str[j+1:j+4], 8) ) )
+ i = j+4
+ return _nulljoin(res)
+# end _unquote
+
+# The _getdate() routine is used to set the expiration time in
+# the cookie's HTTP header. By default, _getdate() returns the
+# current time in the appropriate "expires" format for a
+# Set-Cookie header. The one optional argument is an offset from
+# now, in seconds. For example, an offset of -3600 means "one hour ago".
+# The offset may be a floating point number.
+#
+
+_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+_monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
+ from time import gmtime, time
+ now = time()
+ year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
+ return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
+ (weekdayname[wd], day, monthname[month], year, hh, mm, ss)
+
+
+#
+# A class to hold ONE key,value pair.
+# In a cookie, each such pair may have several attributes.
+# so this class is used to keep the attributes associated
+# with the appropriate key,value pair.
+# This class also includes a coded_value attribute, which
+# is used to hold the network representation of the
+# value. This is most useful when Python objects are
+# pickled for network transit.
+#
+
+class Morsel(dict):
+ # RFC 2109 lists these attributes as reserved:
+ # path comment domain
+ # max-age secure version
+ #
+ # For historical reasons, these attributes are also reserved:
+ # expires
+ #
+ # This is an extension from Microsoft:
+ # httponly
+ #
+ # This dictionary provides a mapping from the lowercase
+ # variant on the left to the appropriate traditional
+ # formatting on the right.
+ _reserved = { "expires" : "expires",
+ "path" : "Path",
+ "comment" : "Comment",
+ "domain" : "Domain",
+ "max-age" : "Max-Age",
+ "secure" : "secure",
+ "httponly" : "httponly",
+ "version" : "Version",
+ }
+
+ _flags = {'secure', 'httponly'}
+
+ def __init__(self):
+ # Set defaults
+ self.key = self.value = self.coded_value = None
+
+ # Set default attributes
+ for K in self._reserved:
+ dict.__setitem__(self, K, "")
+ # end __init__
+
+ def __setitem__(self, K, V):
+ K = K.lower()
+ if not K in self._reserved:
+ raise CookieError("Invalid Attribute %s" % K)
+ dict.__setitem__(self, K, V)
+ # end __setitem__
+
+ def isReservedKey(self, K):
+ return K.lower() in self._reserved
+ # end isReservedKey
+
+ def set(self, key, val, coded_val,
+ LegalChars=_LegalChars,
+ idmap=_idmap, translate=string.translate):
+ # First we verify that the key isn't a reserved word
+ # Second we make sure it only contains legal characters
+ if key.lower() in self._reserved:
+ raise CookieError("Attempt to set a reserved key: %s" % key)
+ if "" != translate(key, idmap, LegalChars):
+ raise CookieError("Illegal key value: %s" % key)
+
+ # It's a good key, so save it.
+ self.key = key
+ self.value = val
+ self.coded_value = coded_val
+ # end set
+
+ def output(self, attrs=None, header = "Set-Cookie:"):
+ return "%s %s" % ( header, self.OutputString(attrs) )
+
+ __str__ = output
+
+ def __repr__(self):
+ return '<%s: %s=%s>' % (self.__class__.__name__,
+ self.key, repr(self.value) )
+
+ def js_output(self, attrs=None):
+ # Print javascript
+ return """
+
+ """ % ( self.OutputString(attrs).replace('"',r'\"'), )
+ # end js_output()
+
+ def OutputString(self, attrs=None):
+ # Build up our result
+ #
+ result = []
+ RA = result.append
+
+ # First, the key=value pair
+ RA("%s=%s" % (self.key, self.coded_value))
+
+ # Now add any defined attributes
+ if attrs is None:
+ attrs = self._reserved
+ items = self.items()
+ items.sort()
+ for K,V in items:
+ if V == "": continue
+ if K not in attrs: continue
+ if K == "expires" and type(V) == type(1):
+ RA("%s=%s" % (self._reserved[K], _getdate(V)))
+ elif K == "max-age" and type(V) == type(1):
+ RA("%s=%d" % (self._reserved[K], V))
+ elif K == "secure":
+ RA(str(self._reserved[K]))
+ elif K == "httponly":
+ RA(str(self._reserved[K]))
+ else:
+ RA("%s=%s" % (self._reserved[K], V))
+
+ # Return the result
+ return _semispacejoin(result)
+ # end OutputString
+# end Morsel class
+
+
+
+#
+# Pattern for finding cookie
+#
+# This used to be strict parsing based on the RFC2109 and RFC2068
+# specifications. I have since discovered that MSIE 3.0x doesn't
+# follow the character rules outlined in those specs. As a
+# result, the parsing rules here are less strict.
+#
+
+_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
+_LegalValueChars = _LegalKeyChars + r"\[\]"
+_CookiePattern = re.compile(
+ r"(?x)" # This is a Verbose pattern
+ r"\s*" # Optional whitespace at start of cookie
+ r"(?P" # Start of group 'key'
+ "["+ _LegalKeyChars +"]+?" # Any word of at least one letter, nongreedy
+ r")" # End of group 'key'
+ r"(" # Optional group: there may not be a value.
+ r"\s*=\s*" # Equal Sign
+ r"(?P" # Start of group 'val'
+ r'"(?:[^\\"]|\\.)*"' # Any doublequoted string
+ r"|" # or
+ r"\w{3},\s[\s\w\d-]{9,11}\s[\d:]{8}\sGMT" # Special case for "expires" attr
+ r"|" # or
+ "["+ _LegalValueChars +"]*" # Any word or empty string
+ r")" # End of group 'val'
+ r")?" # End of optional value group
+ r"\s*" # Any number of spaces.
+ r"(\s+|;|$)" # Ending either at space, semicolon, or EOS.
+ )
+
+
+# At long last, here is the cookie class.
+# Using this class is almost just like using a dictionary.
+# See this module's docstring for example usage.
+#
+class BaseCookie(dict):
+ # A container class for a set of Morsels
+ #
+
+ def value_decode(self, val):
+ """real_value, coded_value = value_decode(STRING)
+ Called prior to setting a cookie's value from the network
+ representation. The VALUE is the value read from HTTP
+ header.
+ Override this function to modify the behavior of cookies.
+ """
+ return val, val
+ # end value_encode
+
+ def value_encode(self, val):
+ """real_value, coded_value = value_encode(VALUE)
+ Called prior to setting a cookie's value from the dictionary
+ representation. The VALUE is the value being assigned.
+ Override this function to modify the behavior of cookies.
+ """
+ strval = str(val)
+ return strval, strval
+ # end value_encode
+
+ def __init__(self, input=None):
+ if input: self.load(input)
+ # end __init__
+
+ def __set(self, key, real_value, coded_value):
+ """Private method for setting a cookie's value"""
+ M = self.get(key, Morsel())
+ M.set(key, real_value, coded_value)
+ dict.__setitem__(self, key, M)
+ # end __set
+
+ def __setitem__(self, key, value):
+ """Dictionary style assignment."""
+ if isinstance(value, Morsel):
+ # allow assignment of constructed Morsels (e.g. for pickling)
+ dict.__setitem__(self, key, value)
+ else:
+ rval, cval = self.value_encode(value)
+ self.__set(key, rval, cval)
+ # end __setitem__
+
+ def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
+ """Return a string suitable for HTTP."""
+ result = []
+ items = self.items()
+ items.sort()
+ for K,V in items:
+ result.append( V.output(attrs, header) )
+ return sep.join(result)
+ # end output
+
+ __str__ = output
+
+ def __repr__(self):
+ L = []
+ items = self.items()
+ items.sort()
+ for K,V in items:
+ L.append( '%s=%s' % (K,repr(V.value) ) )
+ return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L))
+
+ def js_output(self, attrs=None):
+ """Return a string suitable for JavaScript."""
+ result = []
+ items = self.items()
+ items.sort()
+ for K,V in items:
+ result.append( V.js_output(attrs) )
+ return _nulljoin(result)
+ # end js_output
+
+ def load(self, rawdata):
+ """Load cookies from a string (presumably HTTP_COOKIE) or
+ from a dictionary. Loading cookies from a dictionary 'd'
+ is equivalent to calling:
+ map(Cookie.__setitem__, d.keys(), d.values())
+ """
+ if type(rawdata) == type(""):
+ self.__ParseString(rawdata)
+ else:
+ # self.update() wouldn't call our custom __setitem__
+ for k, v in rawdata.items():
+ self[k] = v
+ return
+ # end load()
+
+ def __ParseString(self, str, patt=_CookiePattern):
+ i = 0 # Our starting point
+ n = len(str) # Length of string
+ M = None # current morsel
+
+ while 0 <= i < n:
+ # Start looking for a cookie
+ match = patt.match(str, i)
+ if not match: break # No more cookies
+
+ K,V = match.group("key"), match.group("val")
+ i = match.end(0)
+
+ # Parse the key, value in case it's metainfo
+ if K[0] == "$":
+ # We ignore attributes which pertain to the cookie
+ # mechanism as a whole. See RFC 2109.
+ # (Does anyone care?)
+ if M:
+ M[ K[1:] ] = V
+ elif K.lower() in Morsel._reserved:
+ if M:
+ if V is None:
+ if K.lower() in Morsel._flags:
+ M[K] = True
+ else:
+ M[K] = _unquote(V)
+ elif V is not None:
+ rval, cval = self.value_decode(V)
+ self.__set(K, rval, cval)
+ M = self[K]
+ # end __ParseString
+# end BaseCookie class
+
+class SimpleCookie(BaseCookie):
+ """SimpleCookie
+ SimpleCookie supports strings as cookie values. When setting
+ the value using the dictionary assignment notation, SimpleCookie
+ calls the builtin str() to convert the value to a string. Values
+ received from HTTP are kept as strings.
+ """
+ def value_decode(self, val):
+ return _unquote( val ), val
+ def value_encode(self, val):
+ strval = str(val)
+ return strval, _quote( strval )
+# end SimpleCookie
+
+class SerialCookie(BaseCookie):
+ """SerialCookie
+ SerialCookie supports arbitrary objects as cookie values. All
+ values are serialized (using cPickle) before being sent to the
+ client. All incoming values are assumed to be valid Pickle
+ representations. IF AN INCOMING VALUE IS NOT IN A VALID PICKLE
+ FORMAT, THEN AN EXCEPTION WILL BE RAISED.
+
+ Note: Large cookie values add overhead because they must be
+ retransmitted on every HTTP transaction.
+
+ Note: HTTP has a 2k limit on the size of a cookie. This class
+ does not check for this limit, so be careful!!!
+ """
+ def __init__(self, input=None):
+ warnings.warn("SerialCookie class is insecure; do not use it",
+ DeprecationWarning)
+ BaseCookie.__init__(self, input)
+ # end __init__
+ def value_decode(self, val):
+ # This could raise an exception!
+ return loads( _unquote(val) ), val
+ def value_encode(self, val):
+ return val, _quote( dumps(val) )
+# end SerialCookie
+
+class SmartCookie(BaseCookie):
+ """SmartCookie
+ SmartCookie supports arbitrary objects as cookie values. If the
+ object is a string, then it is quoted. If the object is not a
+ string, however, then SmartCookie will use cPickle to serialize
+ the object into a string representation.
+
+ Note: Large cookie values add overhead because they must be
+ retransmitted on every HTTP transaction.
+
+ Note: HTTP has a 2k limit on the size of a cookie. This class
+ does not check for this limit, so be careful!!!
+ """
+ def __init__(self, input=None):
+ warnings.warn("Cookie/SmartCookie class is insecure; do not use it",
+ DeprecationWarning)
+ BaseCookie.__init__(self, input)
+ # end __init__
+ def value_decode(self, val):
+ strval = _unquote(val)
+ try:
+ return loads(strval), val
+ except:
+ return strval, val
+ def value_encode(self, val):
+ if type(val) == type(""):
+ return val, _quote(val)
+ else:
+ return val, _quote( dumps(val) )
+# end SmartCookie
+
+
+###########################################################
+# Backwards Compatibility: Don't break any existing code!
+
+# We provide Cookie() as an alias for SmartCookie()
+Cookie = SmartCookie
+
+#
+###########################################################
+
+def _test():
+ import doctest, Cookie
+ return doctest.testmod(Cookie)
+
+if __name__ == "__main__":
+ _test()
+
+
+#Local Variables:
+#tab-width: 4
+#end:
diff --git a/cashew/Lib/DocXMLRPCServer.py b/cashew/Lib/DocXMLRPCServer.py
new file mode 100644
index 0000000..4064ec2
--- /dev/null
+++ b/cashew/Lib/DocXMLRPCServer.py
@@ -0,0 +1,279 @@
+"""Self documenting XML-RPC Server.
+
+This module can be used to create XML-RPC servers that
+serve pydoc-style documentation in response to HTTP
+GET requests. This documentation is dynamically generated
+based on the functions and methods registered with the
+server.
+
+This module is built upon the pydoc and SimpleXMLRPCServer
+modules.
+"""
+
+import pydoc
+import inspect
+import re
+import sys
+
+from SimpleXMLRPCServer import (SimpleXMLRPCServer,
+ SimpleXMLRPCRequestHandler,
+ CGIXMLRPCRequestHandler,
+ resolve_dotted_attribute)
+
+class ServerHTMLDoc(pydoc.HTMLDoc):
+ """Class used to generate pydoc HTML document for a server"""
+
+ def markup(self, text, escape=None, funcs={}, classes={}, methods={}):
+ """Mark up some plain text, given a context of symbols to look for.
+ Each context dictionary maps object names to anchor names."""
+ escape = escape or self.escape
+ results = []
+ here = 0
+
+ # XXX Note that this regular expression does not allow for the
+ # hyperlinking of arbitrary strings being used as method
+ # names. Only methods with names consisting of word characters
+ # and '.'s are hyperlinked.
+ pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|'
+ r'RFC[- ]?(\d+)|'
+ r'PEP[- ]?(\d+)|'
+ r'(self\.)?((?:\w|\.)+))\b')
+ while 1:
+ match = pattern.search(text, here)
+ if not match: break
+ start, end = match.span()
+ results.append(escape(text[here:start]))
+
+ all, scheme, rfc, pep, selfdot, name = match.groups()
+ if scheme:
+ url = escape(all).replace('"', '"')
+ results.append('%s ' % (url, url))
+ elif rfc:
+ url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc)
+ results.append('%s ' % (url, escape(all)))
+ elif pep:
+ url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep)
+ results.append('%s ' % (url, escape(all)))
+ elif text[end:end+1] == '(':
+ results.append(self.namelink(name, methods, funcs, classes))
+ elif selfdot:
+ results.append('self.%s ' % name)
+ else:
+ results.append(self.namelink(name, classes))
+ here = end
+ results.append(escape(text[here:]))
+ return ''.join(results)
+
+ def docroutine(self, object, name, mod=None,
+ funcs={}, classes={}, methods={}, cl=None):
+ """Produce HTML documentation for a function or method object."""
+
+ anchor = (cl and cl.__name__ or '') + '-' + name
+ note = ''
+
+ title = '%s ' % (
+ self.escape(anchor), self.escape(name))
+
+ if inspect.ismethod(object):
+ args, varargs, varkw, defaults = inspect.getargspec(object.im_func)
+ # exclude the argument bound to the instance, it will be
+ # confusing to the non-Python user
+ argspec = inspect.formatargspec (
+ args[1:],
+ varargs,
+ varkw,
+ defaults,
+ formatvalue=self.formatvalue
+ )
+ elif inspect.isfunction(object):
+ args, varargs, varkw, defaults = inspect.getargspec(object)
+ argspec = inspect.formatargspec(
+ args, varargs, varkw, defaults, formatvalue=self.formatvalue)
+ else:
+ argspec = '(...)'
+
+ if isinstance(object, tuple):
+ argspec = object[0] or argspec
+ docstring = object[1] or ""
+ else:
+ docstring = pydoc.getdoc(object)
+
+ decl = title + argspec + (note and self.grey(
+ '%s ' % note))
+
+ doc = self.markup(
+ docstring, self.preformat, funcs, classes, methods)
+ doc = doc and '%s ' % doc
+ return '%s %s \n' % (decl, doc)
+
+ def docserver(self, server_name, package_documentation, methods):
+ """Produce HTML documentation for an XML-RPC server."""
+
+ fdict = {}
+ for key, value in methods.items():
+ fdict[key] = '#-' + key
+ fdict[value] = fdict[key]
+
+ server_name = self.escape(server_name)
+ head = '%s ' % server_name
+ result = self.heading(head, '#ffffff', '#7799ee')
+
+ doc = self.markup(package_documentation, self.preformat, fdict)
+ doc = doc and '%s ' % doc
+ result = result + '%s
\n' % doc
+
+ contents = []
+ method_items = sorted(methods.items())
+ for key, value in method_items:
+ contents.append(self.docroutine(value, key, funcs=fdict))
+ result = result + self.bigsection(
+ 'Methods', '#ffffff', '#eeaa77', pydoc.join(contents))
+
+ return result
+
+class XMLRPCDocGenerator:
+ """Generates documentation for an XML-RPC server.
+
+ This class is designed as mix-in and should not
+ be constructed directly.
+ """
+
+ def __init__(self):
+ # setup variables used for HTML documentation
+ self.server_name = 'XML-RPC Server Documentation'
+ self.server_documentation = \
+ "This server exports the following methods through the XML-RPC "\
+ "protocol."
+ self.server_title = 'XML-RPC Server Documentation'
+
+ def set_server_title(self, server_title):
+ """Set the HTML title of the generated server documentation"""
+
+ self.server_title = server_title
+
+ def set_server_name(self, server_name):
+ """Set the name of the generated HTML server documentation"""
+
+ self.server_name = server_name
+
+ def set_server_documentation(self, server_documentation):
+ """Set the documentation string for the entire server."""
+
+ self.server_documentation = server_documentation
+
+ def generate_html_documentation(self):
+ """generate_html_documentation() => html documentation for the server
+
+ Generates HTML documentation for the server using introspection for
+ installed functions and instances that do not implement the
+ _dispatch method. Alternatively, instances can choose to implement
+ the _get_method_argstring(method_name) method to provide the
+ argument string used in the documentation and the
+ _methodHelp(method_name) method to provide the help text used
+ in the documentation."""
+
+ methods = {}
+
+ for method_name in self.system_listMethods():
+ if method_name in self.funcs:
+ method = self.funcs[method_name]
+ elif self.instance is not None:
+ method_info = [None, None] # argspec, documentation
+ if hasattr(self.instance, '_get_method_argstring'):
+ method_info[0] = self.instance._get_method_argstring(method_name)
+ if hasattr(self.instance, '_methodHelp'):
+ method_info[1] = self.instance._methodHelp(method_name)
+
+ method_info = tuple(method_info)
+ if method_info != (None, None):
+ method = method_info
+ elif not hasattr(self.instance, '_dispatch'):
+ try:
+ method = resolve_dotted_attribute(
+ self.instance,
+ method_name
+ )
+ except AttributeError:
+ method = method_info
+ else:
+ method = method_info
+ else:
+ assert 0, "Could not find method in self.functions and no "\
+ "instance installed"
+
+ methods[method_name] = method
+
+ documenter = ServerHTMLDoc()
+ documentation = documenter.docserver(
+ self.server_name,
+ self.server_documentation,
+ methods
+ )
+
+ return documenter.page(self.server_title, documentation)
+
+class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+ """XML-RPC and documentation request handler class.
+
+ Handles all HTTP POST requests and attempts to decode them as
+ XML-RPC requests.
+
+ Handles all HTTP GET requests and interprets them as requests
+ for documentation.
+ """
+
+ def do_GET(self):
+ """Handles the HTTP GET request.
+
+ Interpret all HTTP GET requests as requests for server
+ documentation.
+ """
+ # Check that the path is legal
+ if not self.is_rpc_path_valid():
+ self.report_404()
+ return
+
+ response = self.server.generate_html_documentation()
+ self.send_response(200)
+ self.send_header("Content-type", "text/html")
+ self.send_header("Content-length", str(len(response)))
+ self.end_headers()
+ self.wfile.write(response)
+
+class DocXMLRPCServer( SimpleXMLRPCServer,
+ XMLRPCDocGenerator):
+ """XML-RPC and HTML documentation server.
+
+ Adds the ability to serve server documentation to the capabilities
+ of SimpleXMLRPCServer.
+ """
+
+ def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler,
+ logRequests=1, allow_none=False, encoding=None,
+ bind_and_activate=True):
+ SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests,
+ allow_none, encoding, bind_and_activate)
+ XMLRPCDocGenerator.__init__(self)
+
+class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler,
+ XMLRPCDocGenerator):
+ """Handler for XML-RPC data and documentation requests passed through
+ CGI"""
+
+ def handle_get(self):
+ """Handles the HTTP GET request.
+
+ Interpret all HTTP GET requests as requests for server
+ documentation.
+ """
+
+ response = self.generate_html_documentation()
+
+ print 'Content-Type: text/html'
+ print 'Content-Length: %d' % len(response)
+ print
+ sys.stdout.write(response)
+
+ def __init__(self):
+ CGIXMLRPCRequestHandler.__init__(self)
+ XMLRPCDocGenerator.__init__(self)
diff --git a/cashew/Lib/HTMLParser.py b/cashew/Lib/HTMLParser.py
new file mode 100644
index 0000000..fb9380e
--- /dev/null
+++ b/cashew/Lib/HTMLParser.py
@@ -0,0 +1,476 @@
+"""A parser for HTML and XHTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import markupbase
+import re
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+commentclose = re.compile(r'--\s*>')
+
+# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
+# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
+# note: if you change tagfind/attrfind remember to update locatestarttagend too
+tagfind = re.compile('([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
+# this regex is currently unused, but left for backward compatibility
+tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
+
+attrfind = re.compile(
+ r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
+ r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+
+locatestarttagend = re.compile(r"""
+ <[a-zA-Z][^\t\n\r\f />\x00]* # tag name
+ (?:[\s/]* # optional whitespace before attribute name
+ (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
+ (?:\s*=+\s* # value indicator
+ (?:'[^']*' # LITA-enclosed value
+ |"[^"]*" # LIT-enclosed value
+ |(?!['"])[^>\s]* # bare value
+ )
+ )?(?:\s|/(?!>))*
+ )*
+ )?
+ \s* # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
+# and the tag name, so maybe this should be fixed
+endtagfind = re.compile('\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+class HTMLParseError(Exception):
+ """Exception raised for all parse errors."""
+
+ def __init__(self, msg, position=(None, None)):
+ assert msg
+ self.msg = msg
+ self.lineno = position[0]
+ self.offset = position[1]
+
+ def __str__(self):
+ result = self.msg
+ if self.lineno is not None:
+ result = result + ", at line %d" % self.lineno
+ if self.offset is not None:
+ result = result + ", column %d" % (self.offset + 1)
+ return result
+
+
+class HTMLParser(markupbase.ParserBase):
+ """Find tags and other markup and call handler functions.
+
+ Usage:
+ p = HTMLParser()
+ p.feed(data)
+ ...
+ p.close()
+
+ Start tags are handled by calling self.handle_starttag() or
+ self.handle_startendtag(); end tags by self.handle_endtag(). The
+ data between tags is passed from the parser to the derived class
+ by calling self.handle_data() with the data as argument (the data
+ may be split up in arbitrary chunks). Entity references are
+ passed by calling self.handle_entityref() with the entity
+ reference as the argument. Numeric character references are
+ passed to self.handle_charref() with the string containing the
+ reference as the argument.
+ """
+
+ CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+
+ def __init__(self):
+ """Initialize and reset this instance."""
+ self.reset()
+
+ def reset(self):
+ """Reset this instance. Loses all unprocessed data."""
+ self.rawdata = ''
+ self.lasttag = '???'
+ self.interesting = interesting_normal
+ self.cdata_elem = None
+ markupbase.ParserBase.reset(self)
+
+ def feed(self, data):
+ r"""Feed data to the parser.
+
+ Call this as often as you want, with as little or as much text
+ as you want (may include '\n').
+ """
+ self.rawdata = self.rawdata + data
+ self.goahead(0)
+
+ def close(self):
+ """Handle any buffered data."""
+ self.goahead(1)
+
+ def error(self, message):
+ raise HTMLParseError(message, self.getpos())
+
+ __starttag_text = None
+
+ def get_starttag_text(self):
+ """Return full source of start tag: '<...>'."""
+ return self.__starttag_text
+
+ def set_cdata_mode(self, elem):
+ self.cdata_elem = elem.lower()
+ self.interesting = re.compile(r'\s*%s\s*>' % self.cdata_elem, re.I)
+
+ def clear_cdata_mode(self):
+ self.interesting = interesting_normal
+ self.cdata_elem = None
+
+ # Internal -- handle data as far as reasonable. May leave state
+ # and data to be processed by a subsequent call. If 'end' is
+ # true, force handling all data as if followed by EOF marker.
+ def goahead(self, end):
+ rawdata = self.rawdata
+ i = 0
+ n = len(rawdata)
+ while i < n:
+ match = self.interesting.search(rawdata, i) # < or &
+ if match:
+ j = match.start()
+ else:
+ if self.cdata_elem:
+ break
+ j = n
+ if i < j: self.handle_data(rawdata[i:j])
+ i = self.updatepos(i, j)
+ if i == n: break
+ startswith = rawdata.startswith
+ if startswith('<', i):
+ if starttagopen.match(rawdata, i): # < + letter
+ k = self.parse_starttag(i)
+ elif startswith("", i):
+ k = self.parse_endtag(i)
+ elif startswith("| UnixStreamServer |
+ +-----------+ +------------------+
+ |
+ v
+ +-----------+ +--------------------+
+ | UDPServer |------->| UnixDatagramServer |
+ +-----------+ +--------------------+
+
+Note that UnixDatagramServer derives from UDPServer, not from
+UnixStreamServer -- the only difference between an IP and a Unix
+stream server is the address family, which is simply repeated in both
+unix server classes.
+
+Forking and threading versions of each type of server can be created
+using the ForkingMixIn and ThreadingMixIn mix-in classes. For
+instance, a threading UDP server class is created as follows:
+
+ class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+
+The Mix-in class must come first, since it overrides a method defined
+in UDPServer! Setting the various member variables also changes
+the behavior of the underlying server mechanism.
+
+To implement a service, you must derive a class from
+BaseRequestHandler and redefine its handle() method. You can then run
+various versions of the service by combining one of the server classes
+with your request handler class.
+
+The request handler class must be different for datagram or stream
+services. This can be hidden by using the request handler
+subclasses StreamRequestHandler or DatagramRequestHandler.
+
+Of course, you still have to use your head!
+
+For instance, it makes no sense to use a forking server if the service
+contains state in memory that can be modified by requests (since the
+modifications in the child process would never reach the initial state
+kept in the parent process and passed to each child). In this case,
+you can use a threading server, but you will probably have to use
+locks to avoid two requests that come in nearly simultaneous to apply
+conflicting changes to the server state.
+
+On the other hand, if you are building e.g. an HTTP server, where all
+data is stored externally (e.g. in the file system), a synchronous
+class will essentially render the service "deaf" while one request is
+being handled -- which may be for a very long time if a client is slow
+to read all the data it has requested. Here a threading or forking
+server is appropriate.
+
+In some cases, it may be appropriate to process part of a request
+synchronously, but to finish processing in a forked child depending on
+the request data. This can be implemented by using a synchronous
+server and doing an explicit fork in the request handler class
+handle() method.
+
+Another approach to handling multiple simultaneous requests in an
+environment that supports neither threads nor fork (or where these are
+too expensive or inappropriate for the service) is to maintain an
+explicit table of partially finished requests and to use select() to
+decide which request to work on next (or whether to handle a new
+incoming request). This is particularly important for stream services
+where each client can potentially be connected for a long time (if
+threads or subprocesses cannot be used).
+
+Future work:
+- Standard classes for Sun RPC (which uses either UDP or TCP)
+- Standard mix-in classes to implement various authentication
+ and encryption schemes
+- Standard framework for select-based multiplexing
+
+XXX Open problems:
+- What to do with out-of-band data?
+
+BaseServer:
+- split generic "request" functionality out into BaseServer class.
+ Copyright (C) 2000 Luke Kenneth Casson Leighton
+
+ example: read entries from a SQL database (requires overriding
+ get_request() to return a table entry from the database).
+ entry is processed by a RequestHandlerClass.
+
+"""
+
+# Author of the BaseServer patch: Luke Kenneth Casson Leighton
+
+__version__ = "0.4"
+
+
+import socket
+import select
+import sys
+import os
+import errno
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+
+__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer",
+ "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler",
+ "StreamRequestHandler","DatagramRequestHandler",
+ "ThreadingMixIn", "ForkingMixIn"]
+if hasattr(socket, "AF_UNIX"):
+ __all__.extend(["UnixStreamServer","UnixDatagramServer",
+ "ThreadingUnixStreamServer",
+ "ThreadingUnixDatagramServer"])
+
+def _eintr_retry(func, *args):
+ """restart a system call interrupted by EINTR"""
+ while True:
+ try:
+ return func(*args)
+ except (OSError, select.error) as e:
+ if e.args[0] != errno.EINTR:
+ raise
+
+class BaseServer:
+
+ """Base class for server classes.
+
+ Methods for the caller:
+
+ - __init__(server_address, RequestHandlerClass)
+ - serve_forever(poll_interval=0.5)
+ - shutdown()
+ - handle_request() # if you do not use serve_forever()
+ - fileno() -> int # for select()
+
+ Methods that may be overridden:
+
+ - server_bind()
+ - server_activate()
+ - get_request() -> request, client_address
+ - handle_timeout()
+ - verify_request(request, client_address)
+ - server_close()
+ - process_request(request, client_address)
+ - shutdown_request(request)
+ - close_request(request)
+ - handle_error()
+
+ Methods for derived classes:
+
+ - finish_request(request, client_address)
+
+ Class variables that may be overridden by derived classes or
+ instances:
+
+ - timeout
+ - address_family
+ - socket_type
+ - allow_reuse_address
+
+ Instance variables:
+
+ - RequestHandlerClass
+ - socket
+
+ """
+
+ timeout = None
+
+ def __init__(self, server_address, RequestHandlerClass):
+ """Constructor. May be extended, do not override."""
+ self.server_address = server_address
+ self.RequestHandlerClass = RequestHandlerClass
+ self.__is_shut_down = threading.Event()
+ self.__shutdown_request = False
+
+ def server_activate(self):
+ """Called by constructor to activate the server.
+
+ May be overridden.
+
+ """
+ pass
+
+ def serve_forever(self, poll_interval=0.5):
+ """Handle one request at a time until shutdown.
+
+ Polls for shutdown every poll_interval seconds. Ignores
+ self.timeout. If you need to do periodic tasks, do them in
+ another thread.
+ """
+ self.__is_shut_down.clear()
+ try:
+ while not self.__shutdown_request:
+ # XXX: Consider using another file descriptor or
+ # connecting to the socket to wake this up instead of
+ # polling. Polling reduces our responsiveness to a
+ # shutdown request and wastes cpu at all other times.
+ r, w, e = _eintr_retry(select.select, [self], [], [],
+ poll_interval)
+ if self in r:
+ self._handle_request_noblock()
+ finally:
+ self.__shutdown_request = False
+ self.__is_shut_down.set()
+
+ def shutdown(self):
+ """Stops the serve_forever loop.
+
+ Blocks until the loop has finished. This must be called while
+ serve_forever() is running in another thread, or it will
+ deadlock.
+ """
+ self.__shutdown_request = True
+ self.__is_shut_down.wait()
+
+ # The distinction between handling, getting, processing and
+ # finishing a request is fairly arbitrary. Remember:
+ #
+ # - handle_request() is the top-level call. It calls
+ # select, get_request(), verify_request() and process_request()
+ # - get_request() is different for stream or datagram sockets
+ # - process_request() is the place that may fork a new process
+ # or create a new thread to finish the request
+ # - finish_request() instantiates the request handler class;
+ # this constructor will handle the request all by itself
+
+ def handle_request(self):
+ """Handle one request, possibly blocking.
+
+ Respects self.timeout.
+ """
+ # Support people who used socket.settimeout() to escape
+ # handle_request before self.timeout was available.
+ timeout = self.socket.gettimeout()
+ if timeout is None:
+ timeout = self.timeout
+ elif self.timeout is not None:
+ timeout = min(timeout, self.timeout)
+ fd_sets = _eintr_retry(select.select, [self], [], [], timeout)
+ if not fd_sets[0]:
+ self.handle_timeout()
+ return
+ self._handle_request_noblock()
+
+ def _handle_request_noblock(self):
+ """Handle one request, without blocking.
+
+ I assume that select.select has returned that the socket is
+ readable before this function was called, so there should be
+ no risk of blocking in get_request().
+ """
+ try:
+ request, client_address = self.get_request()
+ except socket.error:
+ return
+ if self.verify_request(request, client_address):
+ try:
+ self.process_request(request, client_address)
+ except:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+ else:
+ self.shutdown_request(request)
+
+ def handle_timeout(self):
+ """Called if no new request arrives within self.timeout.
+
+ Overridden by ForkingMixIn.
+ """
+ pass
+
+ def verify_request(self, request, client_address):
+ """Verify the request. May be overridden.
+
+ Return True if we should proceed with this request.
+
+ """
+ return True
+
+ def process_request(self, request, client_address):
+ """Call finish_request.
+
+ Overridden by ForkingMixIn and ThreadingMixIn.
+
+ """
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+
+ def server_close(self):
+ """Called to clean-up the server.
+
+ May be overridden.
+
+ """
+ pass
+
+ def finish_request(self, request, client_address):
+ """Finish one request by instantiating RequestHandlerClass."""
+ self.RequestHandlerClass(request, client_address, self)
+
+ def shutdown_request(self, request):
+ """Called to shutdown and close an individual request."""
+ self.close_request(request)
+
+ def close_request(self, request):
+ """Called to clean up an individual request."""
+ pass
+
+ def handle_error(self, request, client_address):
+ """Handle an error gracefully. May be overridden.
+
+ The default is to print a traceback and continue.
+
+ """
+ print '-'*40
+ print 'Exception happened during processing of request from',
+ print client_address
+ import traceback
+ traceback.print_exc() # XXX But this goes to stderr!
+ print '-'*40
+
+
+class TCPServer(BaseServer):
+
+ """Base class for various socket-based server classes.
+
+ Defaults to synchronous IP stream (i.e., TCP).
+
+ Methods for the caller:
+
+ - __init__(server_address, RequestHandlerClass, bind_and_activate=True)
+ - serve_forever(poll_interval=0.5)
+ - shutdown()
+ - handle_request() # if you don't use serve_forever()
+ - fileno() -> int # for select()
+
+ Methods that may be overridden:
+
+ - server_bind()
+ - server_activate()
+ - get_request() -> request, client_address
+ - handle_timeout()
+ - verify_request(request, client_address)
+ - process_request(request, client_address)
+ - shutdown_request(request)
+ - close_request(request)
+ - handle_error()
+
+ Methods for derived classes:
+
+ - finish_request(request, client_address)
+
+ Class variables that may be overridden by derived classes or
+ instances:
+
+ - timeout
+ - address_family
+ - socket_type
+ - request_queue_size (only for stream sockets)
+ - allow_reuse_address
+
+ Instance variables:
+
+ - server_address
+ - RequestHandlerClass
+ - socket
+
+ """
+
+ address_family = socket.AF_INET
+
+ socket_type = socket.SOCK_STREAM
+
+ request_queue_size = 5
+
+ allow_reuse_address = False
+
+ def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
+ """Constructor. May be extended, do not override."""
+ BaseServer.__init__(self, server_address, RequestHandlerClass)
+ self.socket = socket.socket(self.address_family,
+ self.socket_type)
+ if bind_and_activate:
+ try:
+ self.server_bind()
+ self.server_activate()
+ except:
+ self.server_close()
+ raise
+
+ def server_bind(self):
+ """Called by constructor to bind the socket.
+
+ May be overridden.
+
+ """
+ if self.allow_reuse_address:
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ self.socket.bind(self.server_address)
+ self.server_address = self.socket.getsockname()
+
+ def server_activate(self):
+ """Called by constructor to activate the server.
+
+ May be overridden.
+
+ """
+ self.socket.listen(self.request_queue_size)
+
+ def server_close(self):
+ """Called to clean-up the server.
+
+ May be overridden.
+
+ """
+ self.socket.close()
+
+ def fileno(self):
+ """Return socket file number.
+
+ Interface required by select().
+
+ """
+ return self.socket.fileno()
+
+ def get_request(self):
+ """Get the request and client address from the socket.
+
+ May be overridden.
+
+ """
+ return self.socket.accept()
+
+ def shutdown_request(self, request):
+ """Called to shutdown and close an individual request."""
+ try:
+ #explicitly shutdown. socket.close() merely releases
+ #the socket and waits for GC to perform the actual close.
+ request.shutdown(socket.SHUT_WR)
+ except socket.error:
+ pass #some platforms may raise ENOTCONN here
+ self.close_request(request)
+
+ def close_request(self, request):
+ """Called to clean up an individual request."""
+ request.close()
+
+
+class UDPServer(TCPServer):
+
+ """UDP server class."""
+
+ allow_reuse_address = False
+
+ socket_type = socket.SOCK_DGRAM
+
+ max_packet_size = 8192
+
+ def get_request(self):
+ data, client_addr = self.socket.recvfrom(self.max_packet_size)
+ return (data, self.socket), client_addr
+
+ def server_activate(self):
+ # No need to call listen() for UDP.
+ pass
+
+ def shutdown_request(self, request):
+ # No need to shutdown anything.
+ self.close_request(request)
+
+ def close_request(self, request):
+ # No need to close anything.
+ pass
+
+class ForkingMixIn:
+
+ """Mix-in class to handle each request in a new process."""
+
+ timeout = 300
+ active_children = None
+ max_children = 40
+
+ def collect_children(self):
+ """Internal routine to wait for children that have exited."""
+ if self.active_children is None:
+ return
+
+ # If we're above the max number of children, wait and reap them until
+ # we go back below threshold. Note that we use waitpid(-1) below to be
+ # able to collect children in size() syscalls instead
+ # of size(): the downside is that this might reap children
+ # which we didn't spawn, which is why we only resort to this when we're
+ # above max_children.
+ while len(self.active_children) >= self.max_children:
+ try:
+ pid, _ = os.waitpid(-1, 0)
+ self.active_children.discard(pid)
+ except OSError as e:
+ if e.errno == errno.ECHILD:
+ # we don't have any children, we're done
+ self.active_children.clear()
+ elif e.errno != errno.EINTR:
+ break
+
+ # Now reap all defunct children.
+ for pid in self.active_children.copy():
+ try:
+ pid, _ = os.waitpid(pid, os.WNOHANG)
+ # if the child hasn't exited yet, pid will be 0 and ignored by
+ # discard() below
+ self.active_children.discard(pid)
+ except OSError as e:
+ if e.errno == errno.ECHILD:
+ # someone else reaped it
+ self.active_children.discard(pid)
+
+ def handle_timeout(self):
+ """Wait for zombies after self.timeout seconds of inactivity.
+
+ May be extended, do not override.
+ """
+ self.collect_children()
+
+ def process_request(self, request, client_address):
+ """Fork a new subprocess to process the request."""
+ self.collect_children()
+ pid = os.fork()
+ if pid:
+ # Parent process
+ if self.active_children is None:
+ self.active_children = set()
+ self.active_children.add(pid)
+ self.close_request(request) #close handle in parent process
+ return
+ else:
+ # Child process.
+ # This must never return, hence os._exit()!
+ try:
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+ os._exit(0)
+ except:
+ try:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+ finally:
+ os._exit(1)
+
+
+class ThreadingMixIn:
+ """Mix-in class to handle each request in a new thread."""
+
+ # Decides how threads will act upon termination of the
+ # main process
+ daemon_threads = False
+
+ def process_request_thread(self, request, client_address):
+ """Same as in BaseServer but as a thread.
+
+ In addition, exception handling is done here.
+
+ """
+ try:
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+ except:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+
+ def process_request(self, request, client_address):
+ """Start a new thread to process the request."""
+ t = threading.Thread(target = self.process_request_thread,
+ args = (request, client_address))
+ t.daemon = self.daemon_threads
+ t.start()
+
+
+class ForkingUDPServer(ForkingMixIn, UDPServer): pass
+class ForkingTCPServer(ForkingMixIn, TCPServer): pass
+
+class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
+
+if hasattr(socket, 'AF_UNIX'):
+
+ class UnixStreamServer(TCPServer):
+ address_family = socket.AF_UNIX
+
+ class UnixDatagramServer(UDPServer):
+ address_family = socket.AF_UNIX
+
+ class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
+
+ class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
+
+class BaseRequestHandler:
+
+ """Base class for request handler classes.
+
+ This class is instantiated for each request to be handled. The
+ constructor sets the instance variables request, client_address
+ and server, and then calls the handle() method. To implement a
+ specific service, all you need to do is to derive a class which
+ defines a handle() method.
+
+ The handle() method can find the request as self.request, the
+ client address as self.client_address, and the server (in case it
+ needs access to per-server information) as self.server. Since a
+ separate instance is created for each request, the handle() method
+ can define other arbitrary instance variables.
+
+ """
+
+ def __init__(self, request, client_address, server):
+ self.request = request
+ self.client_address = client_address
+ self.server = server
+ self.setup()
+ try:
+ self.handle()
+ finally:
+ self.finish()
+
+ def setup(self):
+ pass
+
+ def handle(self):
+ pass
+
+ def finish(self):
+ pass
+
+
+# The following two classes make it possible to use the same service
+# class for stream or datagram servers.
+# Each class sets up these instance variables:
+# - rfile: a file object from which receives the request is read
+# - wfile: a file object to which the reply is written
+# When the handle() method returns, wfile is flushed properly
+
+
+class StreamRequestHandler(BaseRequestHandler):
+
+ """Define self.rfile and self.wfile for stream sockets."""
+
+ # Default buffer sizes for rfile, wfile.
+ # We default rfile to buffered because otherwise it could be
+ # really slow for large data (a getc() call per byte); we make
+ # wfile unbuffered because (a) often after a write() we want to
+ # read and we need to flush the line; (b) big writes to unbuffered
+ # files are typically optimized by stdio even when big reads
+ # aren't.
+ rbufsize = -1
+ wbufsize = 0
+
+ # A timeout to apply to the request socket, if not None.
+ timeout = None
+
+ # Disable nagle algorithm for this socket, if True.
+ # Use only when wbufsize != 0, to avoid small packets.
+ disable_nagle_algorithm = False
+
+ def setup(self):
+ self.connection = self.request
+ if self.timeout is not None:
+ self.connection.settimeout(self.timeout)
+ if self.disable_nagle_algorithm:
+ self.connection.setsockopt(socket.IPPROTO_TCP,
+ socket.TCP_NODELAY, True)
+ self.rfile = self.connection.makefile('rb', self.rbufsize)
+ self.wfile = self.connection.makefile('wb', self.wbufsize)
+
+ def finish(self):
+ if not self.wfile.closed:
+ try:
+ self.wfile.flush()
+ except socket.error:
+ # A final socket error may have occurred here, such as
+ # the local error ECONNABORTED.
+ pass
+ self.wfile.close()
+ self.rfile.close()
+
+
+class DatagramRequestHandler(BaseRequestHandler):
+
+ """Define self.rfile and self.wfile for datagram sockets."""
+
+ def setup(self):
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+ self.packet, self.socket = self.request
+ self.rfile = StringIO(self.packet)
+ self.wfile = StringIO()
+
+ def finish(self):
+ self.socket.sendto(self.wfile.getvalue(), self.client_address)
diff --git a/cashew/Lib/StringIO.py b/cashew/Lib/StringIO.py
new file mode 100644
index 0000000..b63525b
--- /dev/null
+++ b/cashew/Lib/StringIO.py
@@ -0,0 +1,324 @@
+r"""File-like objects that read from or write to a string buffer.
+
+This implements (nearly) all stdio methods.
+
+f = StringIO() # ready for writing
+f = StringIO(buf) # ready for reading
+f.close() # explicitly release resources held
+flag = f.isatty() # always false
+pos = f.tell() # get current position
+f.seek(pos) # set current position
+f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
+buf = f.read() # read until EOF
+buf = f.read(n) # read up to n bytes
+buf = f.readline() # read until end of line ('\n') or EOF
+list = f.readlines()# list of f.readline() results until EOF
+f.truncate([size]) # truncate file at to at most size (default: current pos)
+f.write(buf) # write at current position
+f.writelines(list) # for line in list: f.write(line)
+f.getvalue() # return whole file's contents as a string
+
+Notes:
+- Using a real file is often faster (but less convenient).
+- There's also a much faster implementation in C, called cStringIO, but
+ it's not subclassable.
+- fileno() is left unimplemented so that code which uses it triggers
+ an exception early.
+- Seeking far beyond EOF and then writing will insert real null
+ bytes that occupy space in the buffer.
+- There's a simple test set (see end of this file).
+"""
+try:
+ from errno import EINVAL
+except ImportError:
+ EINVAL = 22
+
+__all__ = ["StringIO"]
+
+def _complain_ifclosed(closed):
+ if closed:
+ raise ValueError, "I/O operation on closed file"
+
+class StringIO:
+ """class StringIO([buffer])
+
+ When a StringIO object is created, it can be initialized to an existing
+ string by passing the string to the constructor. If no string is given,
+ the StringIO will start empty.
+
+ The StringIO object can accept either Unicode or 8-bit strings, but
+ mixing the two may take some care. If both are used, 8-bit strings that
+ cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
+ a UnicodeError to be raised when getvalue() is called.
+ """
+ def __init__(self, buf = ''):
+ # Force self.buf to be a string or unicode
+ if not isinstance(buf, basestring):
+ buf = str(buf)
+ self.buf = buf
+ self.len = len(buf)
+ self.buflist = []
+ self.pos = 0
+ self.closed = False
+ self.softspace = 0
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ """A file object is its own iterator, for example iter(f) returns f
+ (unless f is closed). When a file is used as an iterator, typically
+ in a for loop (for example, for line in f: print line), the next()
+ method is called repeatedly. This method returns the next input line,
+ or raises StopIteration when EOF is hit.
+ """
+ _complain_ifclosed(self.closed)
+ r = self.readline()
+ if not r:
+ raise StopIteration
+ return r
+
+ def close(self):
+ """Free the memory buffer.
+ """
+ if not self.closed:
+ self.closed = True
+ del self.buf, self.pos
+
+ def isatty(self):
+ """Returns False because StringIO objects are not connected to a
+ tty-like device.
+ """
+ _complain_ifclosed(self.closed)
+ return False
+
+ def seek(self, pos, mode = 0):
+ """Set the file's current position.
+
+ The mode argument is optional and defaults to 0 (absolute file
+ positioning); other values are 1 (seek relative to the current
+ position) and 2 (seek relative to the file's end).
+
+ There is no return value.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ if mode == 1:
+ pos += self.pos
+ elif mode == 2:
+ pos += self.len
+ self.pos = max(0, pos)
+
+ def tell(self):
+ """Return the file's current position."""
+ _complain_ifclosed(self.closed)
+ return self.pos
+
+ def read(self, n = -1):
+ """Read at most size bytes from the file
+ (less if the read hits EOF before obtaining size bytes).
+
+ If the size argument is negative or omitted, read all data until EOF
+ is reached. The bytes are returned as a string object. An empty
+ string is returned when EOF is encountered immediately.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ if n is None or n < 0:
+ newpos = self.len
+ else:
+ newpos = min(self.pos+n, self.len)
+ r = self.buf[self.pos:newpos]
+ self.pos = newpos
+ return r
+
+ def readline(self, length=None):
+ r"""Read one entire line from the file.
+
+ A trailing newline character is kept in the string (but may be absent
+ when a file ends with an incomplete line). If the size argument is
+ present and non-negative, it is a maximum byte count (including the
+ trailing newline) and an incomplete line may be returned.
+
+ An empty string is returned only when EOF is encountered immediately.
+
+ Note: Unlike stdio's fgets(), the returned string contains null
+ characters ('\0') if they occurred in the input.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ i = self.buf.find('\n', self.pos)
+ if i < 0:
+ newpos = self.len
+ else:
+ newpos = i+1
+ if length is not None and length >= 0:
+ if self.pos + length < newpos:
+ newpos = self.pos + length
+ r = self.buf[self.pos:newpos]
+ self.pos = newpos
+ return r
+
+ def readlines(self, sizehint = 0):
+ """Read until EOF using readline() and return a list containing the
+ lines thus read.
+
+ If the optional sizehint argument is present, instead of reading up
+ to EOF, whole lines totalling approximately sizehint bytes (or more
+ to accommodate a final whole line).
+ """
+ total = 0
+ lines = []
+ line = self.readline()
+ while line:
+ lines.append(line)
+ total += len(line)
+ if 0 < sizehint <= total:
+ break
+ line = self.readline()
+ return lines
+
+ def truncate(self, size=None):
+ """Truncate the file's size.
+
+ If the optional size argument is present, the file is truncated to
+ (at most) that size. The size defaults to the current position.
+ The current file position is not changed unless the position
+ is beyond the new file size.
+
+ If the specified size exceeds the file's current size, the
+ file remains unchanged.
+ """
+ _complain_ifclosed(self.closed)
+ if size is None:
+ size = self.pos
+ elif size < 0:
+ raise IOError(EINVAL, "Negative size not allowed")
+ elif size < self.pos:
+ self.pos = size
+ self.buf = self.getvalue()[:size]
+ self.len = size
+
+ def write(self, s):
+ """Write a string to the file.
+
+ There is no return value.
+ """
+ _complain_ifclosed(self.closed)
+ if not s: return
+ # Force s to be a string or unicode
+ if not isinstance(s, basestring):
+ s = str(s)
+ spos = self.pos
+ slen = self.len
+ if spos == slen:
+ self.buflist.append(s)
+ self.len = self.pos = spos + len(s)
+ return
+ if spos > slen:
+ self.buflist.append('\0'*(spos - slen))
+ slen = spos
+ newpos = spos + len(s)
+ if spos < slen:
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
+ self.buf = ''
+ if newpos > slen:
+ slen = newpos
+ else:
+ self.buflist.append(s)
+ slen = newpos
+ self.len = slen
+ self.pos = newpos
+
+ def writelines(self, iterable):
+ """Write a sequence of strings to the file. The sequence can be any
+ iterable object producing strings, typically a list of strings. There
+ is no return value.
+
+ (The name is intended to match readlines(); writelines() does not add
+ line separators.)
+ """
+ write = self.write
+ for line in iterable:
+ write(line)
+
+ def flush(self):
+ """Flush the internal buffer
+ """
+ _complain_ifclosed(self.closed)
+
+ def getvalue(self):
+ """
+ Retrieve the entire contents of the "file" at any time before
+ the StringIO object's close() method is called.
+
+ The StringIO object can accept either Unicode or 8-bit strings,
+ but mixing the two may take some care. If both are used, 8-bit
+ strings that cannot be interpreted as 7-bit ASCII (that use the
+ 8th bit) will cause a UnicodeError to be raised when getvalue()
+ is called.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ return self.buf
+
+
+# A little test suite
+
+def test():
+ import sys
+ if sys.argv[1:]:
+ file = sys.argv[1]
+ else:
+ file = '/etc/passwd'
+ lines = open(file, 'r').readlines()
+ text = open(file, 'r').read()
+ f = StringIO()
+ for line in lines[:-2]:
+ f.write(line)
+ f.writelines(lines[-2:])
+ if f.getvalue() != text:
+ raise RuntimeError, 'write failed'
+ length = f.tell()
+ print 'File length =', length
+ f.seek(len(lines[0]))
+ f.write(lines[1])
+ f.seek(0)
+ print 'First line =', repr(f.readline())
+ print 'Position =', f.tell()
+ line = f.readline()
+ print 'Second line =', repr(line)
+ f.seek(-len(line), 1)
+ line2 = f.read(len(line))
+ if line != line2:
+ raise RuntimeError, 'bad result after seek back'
+ f.seek(len(line2), 1)
+ list = f.readlines()
+ line = list[-1]
+ f.seek(f.tell() - len(line))
+ line2 = f.read()
+ if line != line2:
+ raise RuntimeError, 'bad result after seek back from EOF'
+ print 'Read', len(list), 'more lines'
+ print 'File length =', f.tell()
+ if f.tell() != length:
+ raise RuntimeError, 'bad length'
+ f.truncate(length/2)
+ f.seek(0, 2)
+ print 'Truncated length =', f.tell()
+ if f.tell() != length/2:
+ raise RuntimeError, 'truncate did not adjust length'
+ f.close()
+
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/UserDict.py b/cashew/Lib/UserDict.py
new file mode 100644
index 0000000..732b327
--- /dev/null
+++ b/cashew/Lib/UserDict.py
@@ -0,0 +1,213 @@
+"""A more or less complete user-defined wrapper around dictionary objects."""
+
+class UserDict:
+ def __init__(*args, **kwargs):
+ if not args:
+ raise TypeError("descriptor '__init__' of 'UserDict' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ if args:
+ dict = args[0]
+ elif 'dict' in kwargs:
+ dict = kwargs.pop('dict')
+ import warnings
+ warnings.warn("Passing 'dict' as keyword argument is "
+ "deprecated", PendingDeprecationWarning,
+ stacklevel=2)
+ else:
+ dict = None
+ self.data = {}
+ if dict is not None:
+ self.update(dict)
+ if len(kwargs):
+ self.update(kwargs)
+ def __repr__(self): return repr(self.data)
+ def __cmp__(self, dict):
+ if isinstance(dict, UserDict):
+ return cmp(self.data, dict.data)
+ else:
+ return cmp(self.data, dict)
+ __hash__ = None # Avoid Py3k warning
+ def __len__(self): return len(self.data)
+ def __getitem__(self, key):
+ if key in self.data:
+ return self.data[key]
+ if hasattr(self.__class__, "__missing__"):
+ return self.__class__.__missing__(self, key)
+ raise KeyError(key)
+ def __setitem__(self, key, item): self.data[key] = item
+ def __delitem__(self, key): del self.data[key]
+ def clear(self): self.data.clear()
+ def copy(self):
+ if self.__class__ is UserDict:
+ return UserDict(self.data.copy())
+ import copy
+ data = self.data
+ try:
+ self.data = {}
+ c = copy.copy(self)
+ finally:
+ self.data = data
+ c.update(self)
+ return c
+ def keys(self): return self.data.keys()
+ def items(self): return self.data.items()
+ def iteritems(self): return self.data.iteritems()
+ def iterkeys(self): return self.data.iterkeys()
+ def itervalues(self): return self.data.itervalues()
+ def values(self): return self.data.values()
+ def has_key(self, key): return key in self.data
+ def update(*args, **kwargs):
+ if not args:
+ raise TypeError("descriptor 'update' of 'UserDict' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ if args:
+ dict = args[0]
+ elif 'dict' in kwargs:
+ dict = kwargs.pop('dict')
+ import warnings
+ warnings.warn("Passing 'dict' as keyword argument is deprecated",
+ PendingDeprecationWarning, stacklevel=2)
+ else:
+ dict = None
+ if dict is None:
+ pass
+ elif isinstance(dict, UserDict):
+ self.data.update(dict.data)
+ elif isinstance(dict, type({})) or not hasattr(dict, 'items'):
+ self.data.update(dict)
+ else:
+ for k, v in dict.items():
+ self[k] = v
+ if len(kwargs):
+ self.data.update(kwargs)
+ def get(self, key, failobj=None):
+ if key not in self:
+ return failobj
+ return self[key]
+ def setdefault(self, key, failobj=None):
+ if key not in self:
+ self[key] = failobj
+ return self[key]
+ def pop(self, key, *args):
+ return self.data.pop(key, *args)
+ def popitem(self):
+ return self.data.popitem()
+ def __contains__(self, key):
+ return key in self.data
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ d = cls()
+ for key in iterable:
+ d[key] = value
+ return d
+
+class IterableUserDict(UserDict):
+ def __iter__(self):
+ return iter(self.data)
+
+import _abcoll
+_abcoll.MutableMapping.register(IterableUserDict)
+
+
+class DictMixin:
+ # Mixin defining all dictionary methods for classes that already have
+ # a minimum dictionary interface including getitem, setitem, delitem,
+ # and keys. Without knowledge of the subclass constructor, the mixin
+ # does not define __init__() or copy(). In addition to the four base
+ # methods, progressively more efficiency comes with defining
+ # __contains__(), __iter__(), and iteritems().
+
+ # second level definitions support higher levels
+ def __iter__(self):
+ for k in self.keys():
+ yield k
+ def has_key(self, key):
+ try:
+ self[key]
+ except KeyError:
+ return False
+ return True
+ def __contains__(self, key):
+ return self.has_key(key)
+
+ # third level takes advantage of second level definitions
+ def iteritems(self):
+ for k in self:
+ yield (k, self[k])
+ def iterkeys(self):
+ return self.__iter__()
+
+ # fourth level uses definitions from lower levels
+ def itervalues(self):
+ for _, v in self.iteritems():
+ yield v
+ def values(self):
+ return [v for _, v in self.iteritems()]
+ def items(self):
+ return list(self.iteritems())
+ def clear(self):
+ for key in self.keys():
+ del self[key]
+ def setdefault(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ self[key] = default
+ return default
+ def pop(self, key, *args):
+ if len(args) > 1:
+ raise TypeError, "pop expected at most 2 arguments, got "\
+ + repr(1 + len(args))
+ try:
+ value = self[key]
+ except KeyError:
+ if args:
+ return args[0]
+ raise
+ del self[key]
+ return value
+ def popitem(self):
+ try:
+ k, v = self.iteritems().next()
+ except StopIteration:
+ raise KeyError, 'container is empty'
+ del self[k]
+ return (k, v)
+ def update(self, other=None, **kwargs):
+ # Make progressively weaker assumptions about "other"
+ if other is None:
+ pass
+ elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
+ for k, v in other.iteritems():
+ self[k] = v
+ elif hasattr(other, 'keys'):
+ for k in other.keys():
+ self[k] = other[k]
+ else:
+ for k, v in other:
+ self[k] = v
+ if kwargs:
+ self.update(kwargs)
+ def get(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ return default
+ def __repr__(self):
+ return repr(dict(self.iteritems()))
+ def __cmp__(self, other):
+ if other is None:
+ return 1
+ if isinstance(other, DictMixin):
+ other = dict(other.iteritems())
+ return cmp(dict(self.iteritems()), other)
+ def __len__(self):
+ return len(self.keys())
diff --git a/cashew/Lib/UserList.py b/cashew/Lib/UserList.py
new file mode 100644
index 0000000..b445985
--- /dev/null
+++ b/cashew/Lib/UserList.py
@@ -0,0 +1,88 @@
+"""A more or less complete user-defined wrapper around list objects."""
+
+import collections
+
+class UserList(collections.MutableSequence):
+ def __init__(self, initlist=None):
+ self.data = []
+ if initlist is not None:
+ # XXX should this accept an arbitrary sequence?
+ if type(initlist) == type(self.data):
+ self.data[:] = initlist
+ elif isinstance(initlist, UserList):
+ self.data[:] = initlist.data[:]
+ else:
+ self.data = list(initlist)
+ def __repr__(self): return repr(self.data)
+ def __lt__(self, other): return self.data < self.__cast(other)
+ def __le__(self, other): return self.data <= self.__cast(other)
+ def __eq__(self, other): return self.data == self.__cast(other)
+ def __ne__(self, other): return self.data != self.__cast(other)
+ def __gt__(self, other): return self.data > self.__cast(other)
+ def __ge__(self, other): return self.data >= self.__cast(other)
+ def __cast(self, other):
+ if isinstance(other, UserList): return other.data
+ else: return other
+ def __cmp__(self, other):
+ return cmp(self.data, self.__cast(other))
+ __hash__ = None # Mutable sequence, so not hashable
+ def __contains__(self, item): return item in self.data
+ def __len__(self): return len(self.data)
+ def __getitem__(self, i): return self.data[i]
+ def __setitem__(self, i, item): self.data[i] = item
+ def __delitem__(self, i): del self.data[i]
+ def __getslice__(self, i, j):
+ i = max(i, 0); j = max(j, 0)
+ return self.__class__(self.data[i:j])
+ def __setslice__(self, i, j, other):
+ i = max(i, 0); j = max(j, 0)
+ if isinstance(other, UserList):
+ self.data[i:j] = other.data
+ elif isinstance(other, type(self.data)):
+ self.data[i:j] = other
+ else:
+ self.data[i:j] = list(other)
+ def __delslice__(self, i, j):
+ i = max(i, 0); j = max(j, 0)
+ del self.data[i:j]
+ def __add__(self, other):
+ if isinstance(other, UserList):
+ return self.__class__(self.data + other.data)
+ elif isinstance(other, type(self.data)):
+ return self.__class__(self.data + other)
+ else:
+ return self.__class__(self.data + list(other))
+ def __radd__(self, other):
+ if isinstance(other, UserList):
+ return self.__class__(other.data + self.data)
+ elif isinstance(other, type(self.data)):
+ return self.__class__(other + self.data)
+ else:
+ return self.__class__(list(other) + self.data)
+ def __iadd__(self, other):
+ if isinstance(other, UserList):
+ self.data += other.data
+ elif isinstance(other, type(self.data)):
+ self.data += other
+ else:
+ self.data += list(other)
+ return self
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+ def __imul__(self, n):
+ self.data *= n
+ return self
+ def append(self, item): self.data.append(item)
+ def insert(self, i, item): self.data.insert(i, item)
+ def pop(self, i=-1): return self.data.pop(i)
+ def remove(self, item): self.data.remove(item)
+ def count(self, item): return self.data.count(item)
+ def index(self, item, *args): return self.data.index(item, *args)
+ def reverse(self): self.data.reverse()
+ def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
+ def extend(self, other):
+ if isinstance(other, UserList):
+ self.data.extend(other.data)
+ else:
+ self.data.extend(other)
diff --git a/cashew/Lib/UserString.py b/cashew/Lib/UserString.py
new file mode 100644
index 0000000..726b3f7
--- /dev/null
+++ b/cashew/Lib/UserString.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python
+## vim:ts=4:et:nowrap
+"""A user-defined wrapper around string objects
+
+Note: string objects have grown methods in Python 1.6
+This module requires Python 1.6 or later.
+"""
+import sys
+import collections
+
+__all__ = ["UserString","MutableString"]
+
+class UserString(collections.Sequence):
+ def __init__(self, seq):
+ if isinstance(seq, basestring):
+ self.data = seq
+ elif isinstance(seq, UserString):
+ self.data = seq.data[:]
+ else:
+ self.data = str(seq)
+ def __str__(self): return str(self.data)
+ def __repr__(self): return repr(self.data)
+ def __int__(self): return int(self.data)
+ def __long__(self): return long(self.data)
+ def __float__(self): return float(self.data)
+ def __complex__(self): return complex(self.data)
+ def __hash__(self): return hash(self.data)
+
+ def __cmp__(self, string):
+ if isinstance(string, UserString):
+ return cmp(self.data, string.data)
+ else:
+ return cmp(self.data, string)
+ def __contains__(self, char):
+ return char in self.data
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, index): return self.__class__(self.data[index])
+ def __getslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ return self.__class__(self.data[start:end])
+
+ def __add__(self, other):
+ if isinstance(other, UserString):
+ return self.__class__(self.data + other.data)
+ elif isinstance(other, basestring):
+ return self.__class__(self.data + other)
+ else:
+ return self.__class__(self.data + str(other))
+ def __radd__(self, other):
+ if isinstance(other, basestring):
+ return self.__class__(other + self.data)
+ else:
+ return self.__class__(str(other) + self.data)
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+ def __mod__(self, args):
+ return self.__class__(self.data % args)
+
+ # the following methods are defined in alphabetical order:
+ def capitalize(self): return self.__class__(self.data.capitalize())
+ def center(self, width, *args):
+ return self.__class__(self.data.center(width, *args))
+ def count(self, sub, start=0, end=sys.maxint):
+ return self.data.count(sub, start, end)
+ def decode(self, encoding=None, errors=None): # XXX improve this?
+ if encoding:
+ if errors:
+ return self.__class__(self.data.decode(encoding, errors))
+ else:
+ return self.__class__(self.data.decode(encoding))
+ else:
+ return self.__class__(self.data.decode())
+ def encode(self, encoding=None, errors=None): # XXX improve this?
+ if encoding:
+ if errors:
+ return self.__class__(self.data.encode(encoding, errors))
+ else:
+ return self.__class__(self.data.encode(encoding))
+ else:
+ return self.__class__(self.data.encode())
+ def endswith(self, suffix, start=0, end=sys.maxint):
+ return self.data.endswith(suffix, start, end)
+ def expandtabs(self, tabsize=8):
+ return self.__class__(self.data.expandtabs(tabsize))
+ def find(self, sub, start=0, end=sys.maxint):
+ return self.data.find(sub, start, end)
+ def index(self, sub, start=0, end=sys.maxint):
+ return self.data.index(sub, start, end)
+ def isalpha(self): return self.data.isalpha()
+ def isalnum(self): return self.data.isalnum()
+ def isdecimal(self): return self.data.isdecimal()
+ def isdigit(self): return self.data.isdigit()
+ def islower(self): return self.data.islower()
+ def isnumeric(self): return self.data.isnumeric()
+ def isspace(self): return self.data.isspace()
+ def istitle(self): return self.data.istitle()
+ def isupper(self): return self.data.isupper()
+ def join(self, seq): return self.data.join(seq)
+ def ljust(self, width, *args):
+ return self.__class__(self.data.ljust(width, *args))
+ def lower(self): return self.__class__(self.data.lower())
+ def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
+ def partition(self, sep):
+ return self.data.partition(sep)
+ def replace(self, old, new, maxsplit=-1):
+ return self.__class__(self.data.replace(old, new, maxsplit))
+ def rfind(self, sub, start=0, end=sys.maxint):
+ return self.data.rfind(sub, start, end)
+ def rindex(self, sub, start=0, end=sys.maxint):
+ return self.data.rindex(sub, start, end)
+ def rjust(self, width, *args):
+ return self.__class__(self.data.rjust(width, *args))
+ def rpartition(self, sep):
+ return self.data.rpartition(sep)
+ def rstrip(self, chars=None): return self.__class__(self.data.rstrip(chars))
+ def split(self, sep=None, maxsplit=-1):
+ return self.data.split(sep, maxsplit)
+ def rsplit(self, sep=None, maxsplit=-1):
+ return self.data.rsplit(sep, maxsplit)
+ def splitlines(self, keepends=0): return self.data.splitlines(keepends)
+ def startswith(self, prefix, start=0, end=sys.maxint):
+ return self.data.startswith(prefix, start, end)
+ def strip(self, chars=None): return self.__class__(self.data.strip(chars))
+ def swapcase(self): return self.__class__(self.data.swapcase())
+ def title(self): return self.__class__(self.data.title())
+ def translate(self, *args):
+ return self.__class__(self.data.translate(*args))
+ def upper(self): return self.__class__(self.data.upper())
+ def zfill(self, width): return self.__class__(self.data.zfill(width))
+
+class MutableString(UserString, collections.MutableSequence):
+ """mutable string objects
+
+ Python strings are immutable objects. This has the advantage, that
+ strings may be used as dictionary keys. If this property isn't needed
+ and you insist on changing string values in place instead, you may cheat
+ and use MutableString.
+
+ But the purpose of this class is an educational one: to prevent
+ people from inventing their own mutable string class derived
+ from UserString and than forget thereby to remove (override) the
+ __hash__ method inherited from UserString. This would lead to
+ errors that would be very hard to track down.
+
+ A faster and better solution is to rewrite your program using lists."""
+ def __init__(self, string=""):
+ from warnings import warnpy3k
+ warnpy3k('the class UserString.MutableString has been removed in '
+ 'Python 3.0', stacklevel=2)
+ self.data = string
+
+ # We inherit object.__hash__, so we must deny this explicitly
+ __hash__ = None
+
+ def __setitem__(self, index, sub):
+ if isinstance(index, slice):
+ if isinstance(sub, UserString):
+ sub = sub.data
+ elif not isinstance(sub, basestring):
+ sub = str(sub)
+ start, stop, step = index.indices(len(self.data))
+ if step == -1:
+ start, stop = stop+1, start+1
+ sub = sub[::-1]
+ elif step != 1:
+ # XXX(twouters): I guess we should be reimplementing
+ # the extended slice assignment/deletion algorithm here...
+ raise TypeError, "invalid step in slicing assignment"
+ start = min(start, stop)
+ self.data = self.data[:start] + sub + self.data[stop:]
+ else:
+ if index < 0:
+ index += len(self.data)
+ if index < 0 or index >= len(self.data): raise IndexError
+ self.data = self.data[:index] + sub + self.data[index+1:]
+ def __delitem__(self, index):
+ if isinstance(index, slice):
+ start, stop, step = index.indices(len(self.data))
+ if step == -1:
+ start, stop = stop+1, start+1
+ elif step != 1:
+ # XXX(twouters): see same block in __setitem__
+ raise TypeError, "invalid step in slicing deletion"
+ start = min(start, stop)
+ self.data = self.data[:start] + self.data[stop:]
+ else:
+ if index < 0:
+ index += len(self.data)
+ if index < 0 or index >= len(self.data): raise IndexError
+ self.data = self.data[:index] + self.data[index+1:]
+ def __setslice__(self, start, end, sub):
+ start = max(start, 0); end = max(end, 0)
+ if isinstance(sub, UserString):
+ self.data = self.data[:start]+sub.data+self.data[end:]
+ elif isinstance(sub, basestring):
+ self.data = self.data[:start]+sub+self.data[end:]
+ else:
+ self.data = self.data[:start]+str(sub)+self.data[end:]
+ def __delslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ self.data = self.data[:start] + self.data[end:]
+ def immutable(self):
+ return UserString(self.data)
+ def __iadd__(self, other):
+ if isinstance(other, UserString):
+ self.data += other.data
+ elif isinstance(other, basestring):
+ self.data += other
+ else:
+ self.data += str(other)
+ return self
+ def __imul__(self, n):
+ self.data *= n
+ return self
+ def insert(self, index, value):
+ self[index:index] = value
+
+if __name__ == "__main__":
+ # execute the regression test to stdout, if called as a script:
+ import os
+ called_in_dir, called_as = os.path.split(sys.argv[0])
+ called_as, py = os.path.splitext(called_as)
+ if '-q' in sys.argv:
+ from test import test_support
+ test_support.verbose = 0
+ __import__('test.test_' + called_as.lower())
diff --git a/cashew/Lib/_LWPCookieJar.py b/cashew/Lib/_LWPCookieJar.py
new file mode 100644
index 0000000..d91cb51
--- /dev/null
+++ b/cashew/Lib/_LWPCookieJar.py
@@ -0,0 +1,170 @@
+"""Load / save to libwww-perl (LWP) format files.
+
+Actually, the format is slightly extended from that used by LWP's
+(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
+not recorded by LWP.
+
+It uses the version string "2.0", though really there isn't an LWP Cookies
+2.0 format. This indicates that there is extra information in here
+(domain_dot and # port_spec) while still being compatible with
+libwww-perl, I hope.
+
+"""
+
+import time, re
+from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError,
+ Cookie, MISSING_FILENAME_TEXT,
+ join_header_words, split_header_words,
+ iso2time, time2isoz)
+
+def lwp_cookie_str(cookie):
+ """Return string representation of Cookie in the LWP cookie file format.
+
+ Actually, the format is extended a bit -- see module docstring.
+
+ """
+ h = [(cookie.name, cookie.value),
+ ("path", cookie.path),
+ ("domain", cookie.domain)]
+ if cookie.port is not None: h.append(("port", cookie.port))
+ if cookie.path_specified: h.append(("path_spec", None))
+ if cookie.port_specified: h.append(("port_spec", None))
+ if cookie.domain_initial_dot: h.append(("domain_dot", None))
+ if cookie.secure: h.append(("secure", None))
+ if cookie.expires: h.append(("expires",
+ time2isoz(float(cookie.expires))))
+ if cookie.discard: h.append(("discard", None))
+ if cookie.comment: h.append(("comment", cookie.comment))
+ if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+
+ keys = cookie._rest.keys()
+ keys.sort()
+ for k in keys:
+ h.append((k, str(cookie._rest[k])))
+
+ h.append(("version", str(cookie.version)))
+
+ return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+ """
+ The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
+ "Set-Cookie3" is the format used by the libwww-perl library, not known
+ to be compatible with any browser, but which is easy to read and
+ doesn't lose information about RFC 2965 cookies.
+
+ Additional methods
+
+ as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+ """
+
+ def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+ """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
+
+ ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+ """
+ now = time.time()
+ r = []
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+ return "\n".join(r+[""])
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ # There really isn't an LWP Cookies 2.0 format, but this indicates
+ # that there is extra information in here (domain_dot and
+ # port_spec) while still being compatible with libwww-perl, I hope.
+ f.write("#LWP-Cookies-2.0\n")
+ f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+ finally:
+ f.close()
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ magic = f.readline()
+ if not re.search(self.magic_re, magic):
+ msg = ("%r does not look like a Set-Cookie3 (LWP) format "
+ "file" % filename)
+ raise LoadError(msg)
+
+ now = time.time()
+
+ header = "Set-Cookie3:"
+ boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+ "secure", "discard")
+ value_attrs = ("version",
+ "port", "path", "domain",
+ "expires",
+ "comment", "commenturl")
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+ if not line.startswith(header):
+ continue
+ line = line[len(header):].strip()
+
+ for data in split_header_words([line]):
+ name, value = data[0]
+ standard = {}
+ rest = {}
+ for k in boolean_attrs:
+ standard[k] = False
+ for k, v in data[1:]:
+ if k is not None:
+ lc = k.lower()
+ else:
+ lc = None
+ # don't lose case distinction for unknown fields
+ if (lc in value_attrs) or (lc in boolean_attrs):
+ k = lc
+ if k in boolean_attrs:
+ if v is None: v = True
+ standard[k] = v
+ elif k in value_attrs:
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ h = standard.get
+ expires = h("expires")
+ discard = h("discard")
+ if expires is not None:
+ expires = iso2time(expires)
+ if expires is None:
+ discard = True
+ domain = h("domain")
+ domain_specified = domain.startswith(".")
+ c = Cookie(h("version"), name, value,
+ h("port"), h("port_spec"),
+ domain, domain_specified, h("domain_dot"),
+ h("path"), h("path_spec"),
+ h("secure"),
+ expires,
+ discard,
+ h("comment"),
+ h("commenturl"),
+ rest)
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Set-Cookie3 format file %r: %r" %
+ (filename, line))
diff --git a/cashew/Lib/_MozillaCookieJar.py b/cashew/Lib/_MozillaCookieJar.py
new file mode 100644
index 0000000..585bc17
--- /dev/null
+++ b/cashew/Lib/_MozillaCookieJar.py
@@ -0,0 +1,149 @@
+"""Mozilla / Netscape cookie loading / saving."""
+
+import re, time
+
+from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError,
+ Cookie, MISSING_FILENAME_TEXT)
+
+class MozillaCookieJar(FileCookieJar):
+ """
+
+ WARNING: you may want to backup your browser's cookies file if you use
+ this class to save cookies. I *think* it works, but there have been
+ bugs in the past!
+
+ This class differs from CookieJar only in the format it uses to save and
+ load cookies to and from a file. This class uses the Mozilla/Netscape
+ `cookies.txt' format. lynx uses this file format, too.
+
+ Don't expect cookies saved while the browser is running to be noticed by
+ the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+ you change them on disk while it's running; on Windows, you probably can't
+ save at all while the browser is running).
+
+ Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+ Netscape cookies on saving.
+
+ In particular, the cookie version and port number information is lost,
+ together with information about whether or not Path, Port and Discard were
+ specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+ domain as set in the HTTP header started with a dot (yes, I'm aware some
+ domains in Netscape files start with a dot and some don't -- trust me, you
+ really don't want to know any more about this).
+
+ Note that though Mozilla and Netscape use the same format, they use
+ slightly different headers. The class saves cookies using the Netscape
+ header by default (Mozilla can cope with that).
+
+ """
+ magic_re = "#( Netscape)? HTTP Cookie File"
+ header = """\
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+"""
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ now = time.time()
+
+ magic = f.readline()
+ if not re.search(self.magic_re, magic):
+ f.close()
+ raise LoadError(
+ "%r does not look like a Netscape format cookies file" %
+ filename)
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+
+ # last field may be absent, so keep any trailing tab
+ if line.endswith("\n"): line = line[:-1]
+
+ # skip comments and blank lines XXX what is $ for?
+ if (line.strip().startswith(("#", "$")) or
+ line.strip() == ""):
+ continue
+
+ domain, domain_specified, path, secure, expires, name, value = \
+ line.split("\t")
+ secure = (secure == "TRUE")
+ domain_specified = (domain_specified == "TRUE")
+ if name == "":
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas cookielib regards it as a
+ # cookie with no value.
+ name = value
+ value = None
+
+ initial_dot = domain.startswith(".")
+ assert domain_specified == initial_dot
+
+ discard = False
+ if expires == "":
+ expires = None
+ discard = True
+
+ # assume path_specified is false
+ c = Cookie(0, name, value,
+ None, False,
+ domain, domain_specified, initial_dot,
+ path, False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ {})
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Netscape format cookies file %r: %r" %
+ (filename, line))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ f.write(self.header)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure: secure = "TRUE"
+ else: secure = "FALSE"
+ if cookie.domain.startswith("."): initial_dot = "TRUE"
+ else: initial_dot = "FALSE"
+ if cookie.expires is not None:
+ expires = str(cookie.expires)
+ else:
+ expires = ""
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas cookielib regards it as a
+ # cookie with no value.
+ name = ""
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ "\t".join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value])+
+ "\n")
+ finally:
+ f.close()
diff --git a/cashew/Lib/__future__.py b/cashew/Lib/__future__.py
new file mode 100644
index 0000000..e0996eb
--- /dev/null
+++ b/cashew/Lib/__future__.py
@@ -0,0 +1,128 @@
+"""Record of phased-in incompatible language changes.
+
+Each line is of the form:
+
+ FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease ","
+ CompilerFlag ")"
+
+where, normally, OptionalRelease < MandatoryRelease, and both are 5-tuples
+of the same form as sys.version_info:
+
+ (PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int
+ PY_MINOR_VERSION, # the 1; an int
+ PY_MICRO_VERSION, # the 0; an int
+ PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string
+ PY_RELEASE_SERIAL # the 3; an int
+ )
+
+OptionalRelease records the first release in which
+
+ from __future__ import FeatureName
+
+was accepted.
+
+In the case of MandatoryReleases that have not yet occurred,
+MandatoryRelease predicts the release in which the feature will become part
+of the language.
+
+Else MandatoryRelease records when the feature became part of the language;
+in releases at or after that, modules no longer need
+
+ from __future__ import FeatureName
+
+to use the feature in question, but may continue to use such imports.
+
+MandatoryRelease may also be None, meaning that a planned feature got
+dropped.
+
+Instances of class _Feature have two corresponding methods,
+.getOptionalRelease() and .getMandatoryRelease().
+
+CompilerFlag is the (bitfield) flag that should be passed in the fourth
+argument to the builtin function compile() to enable the feature in
+dynamically compiled code. This flag is stored in the .compiler_flag
+attribute on _Future instances. These values must match the appropriate
+#defines of CO_xxx flags in Include/compile.h.
+
+No feature line is ever to be deleted from this file.
+"""
+
+all_feature_names = [
+ "nested_scopes",
+ "generators",
+ "division",
+ "absolute_import",
+ "with_statement",
+ "print_function",
+ "unicode_literals",
+]
+
+__all__ = ["all_feature_names"] + all_feature_names
+
+# The CO_xxx symbols are defined here under the same names used by
+# compile.h, so that an editor search will find them here. However,
+# they're not exported in __all__, because they don't really belong to
+# this module.
+CO_NESTED = 0x0010 # nested_scopes
+CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000)
+CO_FUTURE_DIVISION = 0x2000 # division
+CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default
+CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement
+CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function
+CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals
+
+class _Feature:
+ def __init__(self, optionalRelease, mandatoryRelease, compiler_flag):
+ self.optional = optionalRelease
+ self.mandatory = mandatoryRelease
+ self.compiler_flag = compiler_flag
+
+ def getOptionalRelease(self):
+ """Return first release in which this feature was recognized.
+
+ This is a 5-tuple, of the same form as sys.version_info.
+ """
+
+ return self.optional
+
+ def getMandatoryRelease(self):
+ """Return release in which this feature will become mandatory.
+
+ This is a 5-tuple, of the same form as sys.version_info, or, if
+ the feature was dropped, is None.
+ """
+
+ return self.mandatory
+
+ def __repr__(self):
+ return "_Feature" + repr((self.optional,
+ self.mandatory,
+ self.compiler_flag))
+
+nested_scopes = _Feature((2, 1, 0, "beta", 1),
+ (2, 2, 0, "alpha", 0),
+ CO_NESTED)
+
+generators = _Feature((2, 2, 0, "alpha", 1),
+ (2, 3, 0, "final", 0),
+ CO_GENERATOR_ALLOWED)
+
+division = _Feature((2, 2, 0, "alpha", 2),
+ (3, 0, 0, "alpha", 0),
+ CO_FUTURE_DIVISION)
+
+absolute_import = _Feature((2, 5, 0, "alpha", 1),
+ (3, 0, 0, "alpha", 0),
+ CO_FUTURE_ABSOLUTE_IMPORT)
+
+with_statement = _Feature((2, 5, 0, "alpha", 1),
+ (2, 6, 0, "alpha", 0),
+ CO_FUTURE_WITH_STATEMENT)
+
+print_function = _Feature((2, 6, 0, "alpha", 2),
+ (3, 0, 0, "alpha", 0),
+ CO_FUTURE_PRINT_FUNCTION)
+
+unicode_literals = _Feature((2, 6, 0, "alpha", 2),
+ (3, 0, 0, "alpha", 0),
+ CO_FUTURE_UNICODE_LITERALS)
diff --git a/cashew/Lib/__phello__.foo.py b/cashew/Lib/__phello__.foo.py
new file mode 100644
index 0000000..8e8623e
--- /dev/null
+++ b/cashew/Lib/__phello__.foo.py
@@ -0,0 +1 @@
+# This file exists as a helper for the test.test_frozen module.
diff --git a/cashew/Lib/_abcoll.py b/cashew/Lib/_abcoll.py
new file mode 100644
index 0000000..b643692
--- /dev/null
+++ b/cashew/Lib/_abcoll.py
@@ -0,0 +1,695 @@
+# Copyright 2007 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Abstract Base Classes (ABCs) for collections, according to PEP 3119.
+
+DON'T USE THIS MODULE DIRECTLY! The classes here should be imported
+via collections; they are defined here only to alleviate certain
+bootstrapping issues. Unit tests are in test_collections.
+"""
+
+from abc import ABCMeta, abstractmethod
+import sys
+
+__all__ = ["Hashable", "Iterable", "Iterator",
+ "Sized", "Container", "Callable",
+ "Set", "MutableSet",
+ "Mapping", "MutableMapping",
+ "MappingView", "KeysView", "ItemsView", "ValuesView",
+ "Sequence", "MutableSequence",
+ ]
+
+### ONE-TRICK PONIES ###
+
+def _hasattr(C, attr):
+ try:
+ return any(attr in B.__dict__ for B in C.__mro__)
+ except AttributeError:
+ # Old-style class
+ return hasattr(C, attr)
+
+
+class Hashable:
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def __hash__(self):
+ return 0
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Hashable:
+ try:
+ for B in C.__mro__:
+ if "__hash__" in B.__dict__:
+ if B.__dict__["__hash__"]:
+ return True
+ break
+ except AttributeError:
+ # Old-style class
+ if getattr(C, "__hash__", None):
+ return True
+ return NotImplemented
+
+
+class Iterable:
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def __iter__(self):
+ while False:
+ yield None
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Iterable:
+ if _hasattr(C, "__iter__"):
+ return True
+ return NotImplemented
+
+Iterable.register(str)
+
+
+class Iterator(Iterable):
+
+ @abstractmethod
+ def next(self):
+ 'Return the next item from the iterator. When exhausted, raise StopIteration'
+ raise StopIteration
+
+ def __iter__(self):
+ return self
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Iterator:
+ if _hasattr(C, "next") and _hasattr(C, "__iter__"):
+ return True
+ return NotImplemented
+
+
+class Sized:
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def __len__(self):
+ return 0
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Sized:
+ if _hasattr(C, "__len__"):
+ return True
+ return NotImplemented
+
+
+class Container:
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def __contains__(self, x):
+ return False
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Container:
+ if _hasattr(C, "__contains__"):
+ return True
+ return NotImplemented
+
+
+class Callable:
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def __call__(self, *args, **kwds):
+ return False
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is Callable:
+ if _hasattr(C, "__call__"):
+ return True
+ return NotImplemented
+
+
+### SETS ###
+
+
+class Set(Sized, Iterable, Container):
+ """A set is a finite, iterable container.
+
+ This class provides concrete generic implementations of all
+ methods except for __contains__, __iter__ and __len__.
+
+ To override the comparisons (presumably for speed, as the
+ semantics are fixed), redefine __le__ and __ge__,
+ then the other operations will automatically follow suit.
+ """
+
+ def __le__(self, other):
+ if not isinstance(other, Set):
+ return NotImplemented
+ if len(self) > len(other):
+ return False
+ for elem in self:
+ if elem not in other:
+ return False
+ return True
+
+ def __lt__(self, other):
+ if not isinstance(other, Set):
+ return NotImplemented
+ return len(self) < len(other) and self.__le__(other)
+
+ def __gt__(self, other):
+ if not isinstance(other, Set):
+ return NotImplemented
+ return len(self) > len(other) and self.__ge__(other)
+
+ def __ge__(self, other):
+ if not isinstance(other, Set):
+ return NotImplemented
+ if len(self) < len(other):
+ return False
+ for elem in other:
+ if elem not in self:
+ return False
+ return True
+
+ def __eq__(self, other):
+ if not isinstance(other, Set):
+ return NotImplemented
+ return len(self) == len(other) and self.__le__(other)
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ @classmethod
+ def _from_iterable(cls, it):
+ '''Construct an instance of the class from any iterable input.
+
+ Must override this method if the class constructor signature
+ does not accept an iterable for an input.
+ '''
+ return cls(it)
+
+ def __and__(self, other):
+ if not isinstance(other, Iterable):
+ return NotImplemented
+ return self._from_iterable(value for value in other if value in self)
+
+ __rand__ = __and__
+
+ def isdisjoint(self, other):
+ 'Return True if two sets have a null intersection.'
+ for value in other:
+ if value in self:
+ return False
+ return True
+
+ def __or__(self, other):
+ if not isinstance(other, Iterable):
+ return NotImplemented
+ chain = (e for s in (self, other) for e in s)
+ return self._from_iterable(chain)
+
+ __ror__ = __or__
+
+ def __sub__(self, other):
+ if not isinstance(other, Set):
+ if not isinstance(other, Iterable):
+ return NotImplemented
+ other = self._from_iterable(other)
+ return self._from_iterable(value for value in self
+ if value not in other)
+
+ def __rsub__(self, other):
+ if not isinstance(other, Set):
+ if not isinstance(other, Iterable):
+ return NotImplemented
+ other = self._from_iterable(other)
+ return self._from_iterable(value for value in other
+ if value not in self)
+
+ def __xor__(self, other):
+ if not isinstance(other, Set):
+ if not isinstance(other, Iterable):
+ return NotImplemented
+ other = self._from_iterable(other)
+ return (self - other) | (other - self)
+
+ __rxor__ = __xor__
+
+ # Sets are not hashable by default, but subclasses can change this
+ __hash__ = None
+
+ def _hash(self):
+ """Compute the hash value of a set.
+
+ Note that we don't define __hash__: not all sets are hashable.
+ But if you define a hashable set type, its __hash__ should
+ call this function.
+
+ This must be compatible __eq__.
+
+ All sets ought to compare equal if they contain the same
+ elements, regardless of how they are implemented, and
+ regardless of the order of the elements; so there's not much
+ freedom for __eq__ or __hash__. We match the algorithm used
+ by the built-in frozenset type.
+ """
+ MAX = sys.maxint
+ MASK = 2 * MAX + 1
+ n = len(self)
+ h = 1927868237 * (n + 1)
+ h &= MASK
+ for x in self:
+ hx = hash(x)
+ h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167
+ h &= MASK
+ h = h * 69069 + 907133923
+ h &= MASK
+ if h > MAX:
+ h -= MASK + 1
+ if h == -1:
+ h = 590923713
+ return h
+
+Set.register(frozenset)
+
+
+class MutableSet(Set):
+ """A mutable set is a finite, iterable container.
+
+ This class provides concrete generic implementations of all
+ methods except for __contains__, __iter__, __len__,
+ add(), and discard().
+
+ To override the comparisons (presumably for speed, as the
+ semantics are fixed), all you have to do is redefine __le__ and
+ then the other operations will automatically follow suit.
+ """
+
+ @abstractmethod
+ def add(self, value):
+ """Add an element."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def discard(self, value):
+ """Remove an element. Do not raise an exception if absent."""
+ raise NotImplementedError
+
+ def remove(self, value):
+ """Remove an element. If not a member, raise a KeyError."""
+ if value not in self:
+ raise KeyError(value)
+ self.discard(value)
+
+ def pop(self):
+ """Return the popped value. Raise KeyError if empty."""
+ it = iter(self)
+ try:
+ value = next(it)
+ except StopIteration:
+ raise KeyError
+ self.discard(value)
+ return value
+
+ def clear(self):
+ """This is slow (creates N new iterators!) but effective."""
+ try:
+ while True:
+ self.pop()
+ except KeyError:
+ pass
+
+ def __ior__(self, it):
+ for value in it:
+ self.add(value)
+ return self
+
+ def __iand__(self, it):
+ for value in (self - it):
+ self.discard(value)
+ return self
+
+ def __ixor__(self, it):
+ if it is self:
+ self.clear()
+ else:
+ if not isinstance(it, Set):
+ it = self._from_iterable(it)
+ for value in it:
+ if value in self:
+ self.discard(value)
+ else:
+ self.add(value)
+ return self
+
+ def __isub__(self, it):
+ if it is self:
+ self.clear()
+ else:
+ for value in it:
+ self.discard(value)
+ return self
+
+MutableSet.register(set)
+
+
+### MAPPINGS ###
+
+
+class Mapping(Sized, Iterable, Container):
+
+ """A Mapping is a generic container for associating key/value
+ pairs.
+
+ This class provides concrete generic implementations of all
+ methods except for __getitem__, __iter__, and __len__.
+
+ """
+
+ @abstractmethod
+ def __getitem__(self, key):
+ raise KeyError
+
+ def get(self, key, default=None):
+ 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
+ try:
+ return self[key]
+ except KeyError:
+ return default
+
+ def __contains__(self, key):
+ try:
+ self[key]
+ except KeyError:
+ return False
+ else:
+ return True
+
+ def iterkeys(self):
+ 'D.iterkeys() -> an iterator over the keys of D'
+ return iter(self)
+
+ def itervalues(self):
+ 'D.itervalues() -> an iterator over the values of D'
+ for key in self:
+ yield self[key]
+
+ def iteritems(self):
+ 'D.iteritems() -> an iterator over the (key, value) items of D'
+ for key in self:
+ yield (key, self[key])
+
+ def keys(self):
+ "D.keys() -> list of D's keys"
+ return list(self)
+
+ def items(self):
+ "D.items() -> list of D's (key, value) pairs, as 2-tuples"
+ return [(key, self[key]) for key in self]
+
+ def values(self):
+ "D.values() -> list of D's values"
+ return [self[key] for key in self]
+
+ # Mappings are not hashable by default, but subclasses can change this
+ __hash__ = None
+
+ def __eq__(self, other):
+ if not isinstance(other, Mapping):
+ return NotImplemented
+ return dict(self.items()) == dict(other.items())
+
+ def __ne__(self, other):
+ return not (self == other)
+
+class MappingView(Sized):
+
+ def __init__(self, mapping):
+ self._mapping = mapping
+
+ def __len__(self):
+ return len(self._mapping)
+
+ def __repr__(self):
+ return '{0.__class__.__name__}({0._mapping!r})'.format(self)
+
+
+class KeysView(MappingView, Set):
+
+ @classmethod
+ def _from_iterable(self, it):
+ return set(it)
+
+ def __contains__(self, key):
+ return key in self._mapping
+
+ def __iter__(self):
+ for key in self._mapping:
+ yield key
+
+KeysView.register(type({}.viewkeys()))
+
+class ItemsView(MappingView, Set):
+
+ @classmethod
+ def _from_iterable(self, it):
+ return set(it)
+
+ def __contains__(self, item):
+ key, value = item
+ try:
+ v = self._mapping[key]
+ except KeyError:
+ return False
+ else:
+ return v == value
+
+ def __iter__(self):
+ for key in self._mapping:
+ yield (key, self._mapping[key])
+
+ItemsView.register(type({}.viewitems()))
+
+class ValuesView(MappingView):
+
+ def __contains__(self, value):
+ for key in self._mapping:
+ if value == self._mapping[key]:
+ return True
+ return False
+
+ def __iter__(self):
+ for key in self._mapping:
+ yield self._mapping[key]
+
+ValuesView.register(type({}.viewvalues()))
+
+class MutableMapping(Mapping):
+
+ """A MutableMapping is a generic container for associating
+ key/value pairs.
+
+ This class provides concrete generic implementations of all
+ methods except for __getitem__, __setitem__, __delitem__,
+ __iter__, and __len__.
+
+ """
+
+ @abstractmethod
+ def __setitem__(self, key, value):
+ raise KeyError
+
+ @abstractmethod
+ def __delitem__(self, key):
+ raise KeyError
+
+ __marker = object()
+
+ def pop(self, key, default=__marker):
+ '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+ If key is not found, d is returned if given, otherwise KeyError is raised.
+ '''
+ try:
+ value = self[key]
+ except KeyError:
+ if default is self.__marker:
+ raise
+ return default
+ else:
+ del self[key]
+ return value
+
+ def popitem(self):
+ '''D.popitem() -> (k, v), remove and return some (key, value) pair
+ as a 2-tuple; but raise KeyError if D is empty.
+ '''
+ try:
+ key = next(iter(self))
+ except StopIteration:
+ raise KeyError
+ value = self[key]
+ del self[key]
+ return key, value
+
+ def clear(self):
+ 'D.clear() -> None. Remove all items from D.'
+ try:
+ while True:
+ self.popitem()
+ except KeyError:
+ pass
+
+ def update(*args, **kwds):
+ ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F.
+ If E present and has a .keys() method, does: for k in E: D[k] = E[k]
+ If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
+ In either case, this is followed by: for k, v in F.items(): D[k] = v
+ '''
+ if not args:
+ raise TypeError("descriptor 'update' of 'MutableMapping' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('update expected at most 1 arguments, got %d' %
+ len(args))
+ if args:
+ other = args[0]
+ if isinstance(other, Mapping):
+ for key in other:
+ self[key] = other[key]
+ elif hasattr(other, "keys"):
+ for key in other.keys():
+ self[key] = other[key]
+ else:
+ for key, value in other:
+ self[key] = value
+ for key, value in kwds.items():
+ self[key] = value
+
+ def setdefault(self, key, default=None):
+ 'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D'
+ try:
+ return self[key]
+ except KeyError:
+ self[key] = default
+ return default
+
+MutableMapping.register(dict)
+
+
+### SEQUENCES ###
+
+
+class Sequence(Sized, Iterable, Container):
+ """All the operations on a read-only sequence.
+
+ Concrete subclasses must override __new__ or __init__,
+ __getitem__, and __len__.
+ """
+
+ @abstractmethod
+ def __getitem__(self, index):
+ raise IndexError
+
+ def __iter__(self):
+ i = 0
+ try:
+ while True:
+ v = self[i]
+ yield v
+ i += 1
+ except IndexError:
+ return
+
+ def __contains__(self, value):
+ for v in self:
+ if v == value:
+ return True
+ return False
+
+ def __reversed__(self):
+ for i in reversed(range(len(self))):
+ yield self[i]
+
+ def index(self, value):
+ '''S.index(value) -> integer -- return first index of value.
+ Raises ValueError if the value is not present.
+ '''
+ for i, v in enumerate(self):
+ if v == value:
+ return i
+ raise ValueError
+
+ def count(self, value):
+ 'S.count(value) -> integer -- return number of occurrences of value'
+ return sum(1 for v in self if v == value)
+
+Sequence.register(tuple)
+Sequence.register(basestring)
+Sequence.register(buffer)
+Sequence.register(xrange)
+
+
+class MutableSequence(Sequence):
+
+ """All the operations on a read-only sequence.
+
+ Concrete subclasses must provide __new__ or __init__,
+ __getitem__, __setitem__, __delitem__, __len__, and insert().
+
+ """
+
+ @abstractmethod
+ def __setitem__(self, index, value):
+ raise IndexError
+
+ @abstractmethod
+ def __delitem__(self, index):
+ raise IndexError
+
+ @abstractmethod
+ def insert(self, index, value):
+ 'S.insert(index, object) -- insert object before index'
+ raise IndexError
+
+ def append(self, value):
+ 'S.append(object) -- append object to the end of the sequence'
+ self.insert(len(self), value)
+
+ def reverse(self):
+ 'S.reverse() -- reverse *IN PLACE*'
+ n = len(self)
+ for i in range(n//2):
+ self[i], self[n-i-1] = self[n-i-1], self[i]
+
+ def extend(self, values):
+ 'S.extend(iterable) -- extend sequence by appending elements from the iterable'
+ for v in values:
+ self.append(v)
+
+ def pop(self, index=-1):
+ '''S.pop([index]) -> item -- remove and return item at index (default last).
+ Raise IndexError if list is empty or index is out of range.
+ '''
+ v = self[index]
+ del self[index]
+ return v
+
+ def remove(self, value):
+ '''S.remove(value) -- remove first occurrence of value.
+ Raise ValueError if the value is not present.
+ '''
+ del self[self.index(value)]
+
+ def __iadd__(self, values):
+ self.extend(values)
+ return self
+
+MutableSequence.register(list)
diff --git a/cashew/Lib/_osx_support.py b/cashew/Lib/_osx_support.py
new file mode 100644
index 0000000..d2aaae7
--- /dev/null
+++ b/cashew/Lib/_osx_support.py
@@ -0,0 +1,502 @@
+"""Shared OS X support functions."""
+
+import os
+import re
+import sys
+
+__all__ = [
+ 'compiler_fixup',
+ 'customize_config_vars',
+ 'customize_compiler',
+ 'get_platform_osx',
+]
+
+# configuration variables that may contain universal build flags,
+# like "-arch" or "-isdkroot", that may need customization for
+# the user environment
+_UNIVERSAL_CONFIG_VARS = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS', 'BASECFLAGS',
+ 'BLDSHARED', 'LDSHARED', 'CC', 'CXX',
+ 'PY_CFLAGS', 'PY_LDFLAGS', 'PY_CPPFLAGS',
+ 'PY_CORE_CFLAGS')
+
+# configuration variables that may contain compiler calls
+_COMPILER_CONFIG_VARS = ('BLDSHARED', 'LDSHARED', 'CC', 'CXX')
+
+# prefix added to original configuration variable names
+_INITPRE = '_OSX_SUPPORT_INITIAL_'
+
+
+def _find_executable(executable, path=None):
+ """Tries to find 'executable' in the directories listed in 'path'.
+
+ A string listing directories separated by 'os.pathsep'; defaults to
+ os.environ['PATH']. Returns the complete filename or None if not found.
+ """
+ if path is None:
+ path = os.environ['PATH']
+
+ paths = path.split(os.pathsep)
+ base, ext = os.path.splitext(executable)
+
+ if (sys.platform == 'win32' or os.name == 'os2') and (ext != '.exe'):
+ executable = executable + '.exe'
+
+ if not os.path.isfile(executable):
+ for p in paths:
+ f = os.path.join(p, executable)
+ if os.path.isfile(f):
+ # the file exists, we have a shot at spawn working
+ return f
+ return None
+ else:
+ return executable
+
+
+def _read_output(commandstring):
+ """Output from successful command execution or None"""
+ # Similar to os.popen(commandstring, "r").read(),
+ # but without actually using os.popen because that
+ # function is not usable during python bootstrap.
+ # tempfile is also not available then.
+ import contextlib
+ try:
+ import tempfile
+ fp = tempfile.NamedTemporaryFile()
+ except ImportError:
+ fp = open("/tmp/_osx_support.%s"%(
+ os.getpid(),), "w+b")
+
+ with contextlib.closing(fp) as fp:
+ cmd = "%s 2>/dev/null >'%s'" % (commandstring, fp.name)
+ return fp.read().strip() if not os.system(cmd) else None
+
+
+def _find_build_tool(toolname):
+ """Find a build tool on current path or using xcrun"""
+ return (_find_executable(toolname)
+ or _read_output("/usr/bin/xcrun -find %s" % (toolname,))
+ or ''
+ )
+
+_SYSTEM_VERSION = None
+
+def _get_system_version():
+ """Return the OS X system version as a string"""
+ # Reading this plist is a documented way to get the system
+ # version (see the documentation for the Gestalt Manager)
+ # We avoid using platform.mac_ver to avoid possible bootstrap issues during
+ # the build of Python itself (distutils is used to build standard library
+ # extensions).
+
+ global _SYSTEM_VERSION
+
+ if _SYSTEM_VERSION is None:
+ _SYSTEM_VERSION = ''
+ try:
+ f = open('/System/Library/CoreServices/SystemVersion.plist')
+ except IOError:
+ # We're on a plain darwin box, fall back to the default
+ # behaviour.
+ pass
+ else:
+ try:
+ m = re.search(r'ProductUserVisibleVersion \s*'
+ r'(.*?) ', f.read())
+ finally:
+ f.close()
+ if m is not None:
+ _SYSTEM_VERSION = '.'.join(m.group(1).split('.')[:2])
+ # else: fall back to the default behaviour
+
+ return _SYSTEM_VERSION
+
+def _remove_original_values(_config_vars):
+ """Remove original unmodified values for testing"""
+ # This is needed for higher-level cross-platform tests of get_platform.
+ for k in list(_config_vars):
+ if k.startswith(_INITPRE):
+ del _config_vars[k]
+
+def _save_modified_value(_config_vars, cv, newvalue):
+ """Save modified and original unmodified value of configuration var"""
+
+ oldvalue = _config_vars.get(cv, '')
+ if (oldvalue != newvalue) and (_INITPRE + cv not in _config_vars):
+ _config_vars[_INITPRE + cv] = oldvalue
+ _config_vars[cv] = newvalue
+
+def _supports_universal_builds():
+ """Returns True if universal builds are supported on this system"""
+ # As an approximation, we assume that if we are running on 10.4 or above,
+ # then we are running with an Xcode environment that supports universal
+ # builds, in particular -isysroot and -arch arguments to the compiler. This
+ # is in support of allowing 10.4 universal builds to run on 10.3.x systems.
+
+ osx_version = _get_system_version()
+ if osx_version:
+ try:
+ osx_version = tuple(int(i) for i in osx_version.split('.'))
+ except ValueError:
+ osx_version = ''
+ return bool(osx_version >= (10, 4)) if osx_version else False
+
+
+def _find_appropriate_compiler(_config_vars):
+ """Find appropriate C compiler for extension module builds"""
+
+ # Issue #13590:
+ # The OSX location for the compiler varies between OSX
+ # (or rather Xcode) releases. With older releases (up-to 10.5)
+ # the compiler is in /usr/bin, with newer releases the compiler
+ # can only be found inside Xcode.app if the "Command Line Tools"
+ # are not installed.
+ #
+ # Furthermore, the compiler that can be used varies between
+ # Xcode releases. Up to Xcode 4 it was possible to use 'gcc-4.2'
+ # as the compiler, after that 'clang' should be used because
+ # gcc-4.2 is either not present, or a copy of 'llvm-gcc' that
+ # miscompiles Python.
+
+ # skip checks if the compiler was overridden with a CC env variable
+ if 'CC' in os.environ:
+ return _config_vars
+
+ # The CC config var might contain additional arguments.
+ # Ignore them while searching.
+ cc = oldcc = _config_vars['CC'].split()[0]
+ if not _find_executable(cc):
+ # Compiler is not found on the shell search PATH.
+ # Now search for clang, first on PATH (if the Command LIne
+ # Tools have been installed in / or if the user has provided
+ # another location via CC). If not found, try using xcrun
+ # to find an uninstalled clang (within a selected Xcode).
+
+ # NOTE: Cannot use subprocess here because of bootstrap
+ # issues when building Python itself (and os.popen is
+ # implemented on top of subprocess and is therefore not
+ # usable as well)
+
+ cc = _find_build_tool('clang')
+
+ elif os.path.basename(cc).startswith('gcc'):
+ # Compiler is GCC, check if it is LLVM-GCC
+ data = _read_output("'%s' --version"
+ % (cc.replace("'", "'\"'\"'"),))
+ if data and 'llvm-gcc' in data:
+ # Found LLVM-GCC, fall back to clang
+ cc = _find_build_tool('clang')
+
+ if not cc:
+ raise SystemError(
+ "Cannot locate working compiler")
+
+ if cc != oldcc:
+ # Found a replacement compiler.
+ # Modify config vars using new compiler, if not already explicitly
+ # overridden by an env variable, preserving additional arguments.
+ for cv in _COMPILER_CONFIG_VARS:
+ if cv in _config_vars and cv not in os.environ:
+ cv_split = _config_vars[cv].split()
+ cv_split[0] = cc if cv != 'CXX' else cc + '++'
+ _save_modified_value(_config_vars, cv, ' '.join(cv_split))
+
+ return _config_vars
+
+
+def _remove_universal_flags(_config_vars):
+ """Remove all universal build arguments from config vars"""
+
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ # Do not alter a config var explicitly overridden by env var
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+\w+\s', ' ', flags)
+ flags = re.sub('-isysroot [^ \t]*', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _remove_unsupported_archs(_config_vars):
+ """Remove any unsupported archs from config vars"""
+ # Different Xcode releases support different sets for '-arch'
+ # flags. In particular, Xcode 4.x no longer supports the
+ # PPC architectures.
+ #
+ # This code automatically removes '-arch ppc' and '-arch ppc64'
+ # when these are not supported. That makes it possible to
+ # build extensions on OSX 10.7 and later with the prebuilt
+ # 32-bit installer on the python.org website.
+
+ # skip checks if the compiler was overridden with a CC env variable
+ if 'CC' in os.environ:
+ return _config_vars
+
+ if re.search('-arch\s+ppc', _config_vars['CFLAGS']) is not None:
+ # NOTE: Cannot use subprocess here because of bootstrap
+ # issues when building Python itself
+ status = os.system(
+ """echo 'int main{};' | """
+ """'%s' -c -arch ppc -x c -o /dev/null /dev/null 2>/dev/null"""
+ %(_config_vars['CC'].replace("'", "'\"'\"'"),))
+ if status:
+ # The compile failed for some reason. Because of differences
+ # across Xcode and compiler versions, there is no reliable way
+ # to be sure why it failed. Assume here it was due to lack of
+ # PPC support and remove the related '-arch' flags from each
+ # config variables not explicitly overridden by an environment
+ # variable. If the error was for some other reason, we hope the
+ # failure will show up again when trying to compile an extension
+ # module.
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+ppc\w*\s', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _override_all_archs(_config_vars):
+ """Allow override of all archs with ARCHFLAGS env var"""
+ # NOTE: This name was introduced by Apple in OSX 10.5 and
+ # is used by several scripting languages distributed with
+ # that OS release.
+ if 'ARCHFLAGS' in os.environ:
+ arch = os.environ['ARCHFLAGS']
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ if cv in _config_vars and '-arch' in _config_vars[cv]:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+\w+\s', ' ', flags)
+ flags = flags + ' ' + arch
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _check_for_unavailable_sdk(_config_vars):
+ """Remove references to any SDKs not available"""
+ # If we're on OSX 10.5 or later and the user tries to
+ # compile an extension using an SDK that is not present
+ # on the current machine it is better to not use an SDK
+ # than to fail. This is particularly important with
+ # the standalone Command Line Tools alternative to a
+ # full-blown Xcode install since the CLT packages do not
+ # provide SDKs. If the SDK is not present, it is assumed
+ # that the header files and dev libs have been installed
+ # to /usr and /System/Library by either a standalone CLT
+ # package or the CLT component within Xcode.
+ cflags = _config_vars.get('CFLAGS', '')
+ m = re.search(r'-isysroot\s+(\S+)', cflags)
+ if m is not None:
+ sdk = m.group(1)
+ if not os.path.exists(sdk):
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ # Do not alter a config var explicitly overridden by env var
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub(r'-isysroot\s+\S+(?:\s|$)', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def compiler_fixup(compiler_so, cc_args):
+ """
+ This function will strip '-isysroot PATH' and '-arch ARCH' from the
+ compile flags if the user has specified one them in extra_compile_flags.
+
+ This is needed because '-arch ARCH' adds another architecture to the
+ build, without a way to remove an architecture. Furthermore GCC will
+ barf if multiple '-isysroot' arguments are present.
+ """
+ stripArch = stripSysroot = False
+
+ compiler_so = list(compiler_so)
+
+ if not _supports_universal_builds():
+ # OSX before 10.4.0, these don't support -arch and -isysroot at
+ # all.
+ stripArch = stripSysroot = True
+ else:
+ stripArch = '-arch' in cc_args
+ stripSysroot = '-isysroot' in cc_args
+
+ if stripArch or 'ARCHFLAGS' in os.environ:
+ while True:
+ try:
+ index = compiler_so.index('-arch')
+ # Strip this argument and the next one:
+ del compiler_so[index:index+2]
+ except ValueError:
+ break
+
+ if 'ARCHFLAGS' in os.environ and not stripArch:
+ # User specified different -arch flags in the environ,
+ # see also distutils.sysconfig
+ compiler_so = compiler_so + os.environ['ARCHFLAGS'].split()
+
+ if stripSysroot:
+ while True:
+ try:
+ index = compiler_so.index('-isysroot')
+ # Strip this argument and the next one:
+ del compiler_so[index:index+2]
+ except ValueError:
+ break
+
+ # Check if the SDK that is used during compilation actually exists,
+ # the universal build requires the usage of a universal SDK and not all
+ # users have that installed by default.
+ sysroot = None
+ if '-isysroot' in cc_args:
+ idx = cc_args.index('-isysroot')
+ sysroot = cc_args[idx+1]
+ elif '-isysroot' in compiler_so:
+ idx = compiler_so.index('-isysroot')
+ sysroot = compiler_so[idx+1]
+
+ if sysroot and not os.path.isdir(sysroot):
+ from distutils import log
+ log.warn("Compiling with an SDK that doesn't seem to exist: %s",
+ sysroot)
+ log.warn("Please check your Xcode installation")
+
+ return compiler_so
+
+
+def customize_config_vars(_config_vars):
+ """Customize Python build configuration variables.
+
+ Called internally from sysconfig with a mutable mapping
+ containing name/value pairs parsed from the configured
+ makefile used to build this interpreter. Returns
+ the mapping updated as needed to reflect the environment
+ in which the interpreter is running; in the case of
+ a Python from a binary installer, the installed
+ environment may be very different from the build
+ environment, i.e. different OS levels, different
+ built tools, different available CPU architectures.
+
+ This customization is performed whenever
+ distutils.sysconfig.get_config_vars() is first
+ called. It may be used in environments where no
+ compilers are present, i.e. when installing pure
+ Python dists. Customization of compiler paths
+ and detection of unavailable archs is deferred
+ until the first extension module build is
+ requested (in distutils.sysconfig.customize_compiler).
+
+ Currently called from distutils.sysconfig
+ """
+
+ if not _supports_universal_builds():
+ # On Mac OS X before 10.4, check if -arch and -isysroot
+ # are in CFLAGS or LDFLAGS and remove them if they are.
+ # This is needed when building extensions on a 10.3 system
+ # using a universal build of python.
+ _remove_universal_flags(_config_vars)
+
+ # Allow user to override all archs with ARCHFLAGS env var
+ _override_all_archs(_config_vars)
+
+ # Remove references to sdks that are not found
+ _check_for_unavailable_sdk(_config_vars)
+
+ return _config_vars
+
+
+def customize_compiler(_config_vars):
+ """Customize compiler path and configuration variables.
+
+ This customization is performed when the first
+ extension module build is requested
+ in distutils.sysconfig.customize_compiler).
+ """
+
+ # Find a compiler to use for extension module builds
+ _find_appropriate_compiler(_config_vars)
+
+ # Remove ppc arch flags if not supported here
+ _remove_unsupported_archs(_config_vars)
+
+ # Allow user to override all archs with ARCHFLAGS env var
+ _override_all_archs(_config_vars)
+
+ return _config_vars
+
+
+def get_platform_osx(_config_vars, osname, release, machine):
+ """Filter values for get_platform()"""
+ # called from get_platform() in sysconfig and distutils.util
+ #
+ # For our purposes, we'll assume that the system version from
+ # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
+ # to. This makes the compatibility story a bit more sane because the
+ # machine is going to compile and link as if it were
+ # MACOSX_DEPLOYMENT_TARGET.
+
+ macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '')
+ macrelease = _get_system_version() or macver
+ macver = macver or macrelease
+
+ if macver:
+ release = macver
+ osname = "macosx"
+
+ # Use the original CFLAGS value, if available, so that we
+ # return the same machine type for the platform string.
+ # Otherwise, distutils may consider this a cross-compiling
+ # case and disallow installs.
+ cflags = _config_vars.get(_INITPRE+'CFLAGS',
+ _config_vars.get('CFLAGS', ''))
+ if macrelease:
+ try:
+ macrelease = tuple(int(i) for i in macrelease.split('.')[0:2])
+ except ValueError:
+ macrelease = (10, 0)
+ else:
+ # assume no universal support
+ macrelease = (10, 0)
+
+ if (macrelease >= (10, 4)) and '-arch' in cflags.strip():
+ # The universal build will build fat binaries, but not on
+ # systems before 10.4
+
+ machine = 'fat'
+
+ archs = re.findall('-arch\s+(\S+)', cflags)
+ archs = tuple(sorted(set(archs)))
+
+ if len(archs) == 1:
+ machine = archs[0]
+ elif archs == ('i386', 'ppc'):
+ machine = 'fat'
+ elif archs == ('i386', 'x86_64'):
+ machine = 'intel'
+ elif archs == ('i386', 'ppc', 'x86_64'):
+ machine = 'fat3'
+ elif archs == ('ppc64', 'x86_64'):
+ machine = 'fat64'
+ elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
+ machine = 'universal'
+ else:
+ raise ValueError(
+ "Don't know machine value for archs=%r" % (archs,))
+
+ elif machine == 'i386':
+ # On OSX the machine type returned by uname is always the
+ # 32-bit variant, even if the executable architecture is
+ # the 64-bit variant
+ if sys.maxint >= 2**32:
+ machine = 'x86_64'
+
+ elif machine in ('PowerPC', 'Power_Macintosh'):
+ # Pick a sane name for the PPC architecture.
+ # See 'i386' case
+ if sys.maxint >= 2**32:
+ machine = 'ppc64'
+ else:
+ machine = 'ppc'
+
+ return (osname, release, machine)
diff --git a/cashew/Lib/_pyio.py b/cashew/Lib/_pyio.py
new file mode 100644
index 0000000..f022a4e
--- /dev/null
+++ b/cashew/Lib/_pyio.py
@@ -0,0 +1,2037 @@
+"""
+Python implementation of the io module.
+"""
+
+from __future__ import (print_function, unicode_literals)
+
+import os
+import abc
+import codecs
+import sys
+import warnings
+import errno
+# Import thread instead of threading to reduce startup cost
+try:
+ from thread import allocate_lock as Lock
+except ImportError:
+ from dummy_thread import allocate_lock as Lock
+
+import io
+from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
+from errno import EINTR
+
+__metaclass__ = type
+
+# open() uses st_blksize whenever we can
+DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
+
+# NOTE: Base classes defined here are registered with the "official" ABCs
+# defined in io.py. We don't use real inheritance though, because we don't want
+# to inherit the C implementations.
+
+
+class BlockingIOError(IOError):
+
+ """Exception raised when I/O would block on a non-blocking I/O stream."""
+
+ def __init__(self, errno, strerror, characters_written=0):
+ super(IOError, self).__init__(errno, strerror)
+ if not isinstance(characters_written, (int, long)):
+ raise TypeError("characters_written must be a integer")
+ self.characters_written = characters_written
+
+
+def open(file, mode="r", buffering=-1,
+ encoding=None, errors=None,
+ newline=None, closefd=True):
+
+ r"""Open file and return a stream. Raise IOError upon failure.
+
+ file is either a text or byte string giving the name (and the path
+ if the file isn't in the current working directory) of the file to
+ be opened or an integer file descriptor of the file to be
+ wrapped. (If a file descriptor is given, it is closed when the
+ returned I/O object is closed, unless closefd is set to False.)
+
+ mode is an optional string that specifies the mode in which the file
+ is opened. It defaults to 'r' which means open for reading in text
+ mode. Other common values are 'w' for writing (truncating the file if
+ it already exists), and 'a' for appending (which on some Unix systems,
+ means that all writes append to the end of the file regardless of the
+ current seek position). In text mode, if encoding is not specified the
+ encoding used is platform dependent. (For reading and writing raw
+ bytes use binary mode and leave encoding unspecified.) The available
+ modes are:
+
+ ========= ===============================================================
+ Character Meaning
+ --------- ---------------------------------------------------------------
+ 'r' open for reading (default)
+ 'w' open for writing, truncating the file first
+ 'a' open for writing, appending to the end of the file if it exists
+ 'b' binary mode
+ 't' text mode (default)
+ '+' open a disk file for updating (reading and writing)
+ 'U' universal newline mode (for backwards compatibility; unneeded
+ for new code)
+ ========= ===============================================================
+
+ The default mode is 'rt' (open for reading text). For binary random
+ access, the mode 'w+b' opens and truncates the file to 0 bytes, while
+ 'r+b' opens the file without truncation.
+
+ Python distinguishes between files opened in binary and text modes,
+ even when the underlying operating system doesn't. Files opened in
+ binary mode (appending 'b' to the mode argument) return contents as
+ bytes objects without any decoding. In text mode (the default, or when
+ 't' is appended to the mode argument), the contents of the file are
+ returned as strings, the bytes having been first decoded using a
+ platform-dependent encoding or using the specified encoding if given.
+
+ buffering is an optional integer used to set the buffering policy.
+ Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
+ line buffering (only usable in text mode), and an integer > 1 to indicate
+ the size of a fixed-size chunk buffer. When no buffering argument is
+ given, the default buffering policy works as follows:
+
+ * Binary files are buffered in fixed-size chunks; the size of the buffer
+ is chosen using a heuristic trying to determine the underlying device's
+ "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
+ On many systems, the buffer will typically be 4096 or 8192 bytes long.
+
+ * "Interactive" text files (files for which isatty() returns True)
+ use line buffering. Other text files use the policy described above
+ for binary files.
+
+ encoding is the name of the encoding used to decode or encode the
+ file. This should only be used in text mode. The default encoding is
+ platform dependent, but any encoding supported by Python can be
+ passed. See the codecs module for the list of supported encodings.
+
+ errors is an optional string that specifies how encoding errors are to
+ be handled---this argument should not be used in binary mode. Pass
+ 'strict' to raise a ValueError exception if there is an encoding error
+ (the default of None has the same effect), or pass 'ignore' to ignore
+ errors. (Note that ignoring encoding errors can lead to data loss.)
+ See the documentation for codecs.register for a list of the permitted
+ encoding error strings.
+
+ newline controls how universal newlines works (it only applies to text
+ mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
+ follows:
+
+ * On input, if newline is None, universal newlines mode is
+ enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
+ these are translated into '\n' before being returned to the
+ caller. If it is '', universal newline mode is enabled, but line
+ endings are returned to the caller untranslated. If it has any of
+ the other legal values, input lines are only terminated by the given
+ string, and the line ending is returned to the caller untranslated.
+
+ * On output, if newline is None, any '\n' characters written are
+ translated to the system default line separator, os.linesep. If
+ newline is '', no translation takes place. If newline is any of the
+ other legal values, any '\n' characters written are translated to
+ the given string.
+
+ If closefd is False, the underlying file descriptor will be kept open
+ when the file is closed. This does not work when a file name is given
+ and must be True in that case.
+
+ open() returns a file object whose type depends on the mode, and
+ through which the standard file operations such as reading and writing
+ are performed. When open() is used to open a file in a text mode ('w',
+ 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
+ a file in a binary mode, the returned class varies: in read binary
+ mode, it returns a BufferedReader; in write binary and append binary
+ modes, it returns a BufferedWriter, and in read/write mode, it returns
+ a BufferedRandom.
+
+ It is also possible to use a string or bytearray as a file for both
+ reading and writing. For strings StringIO can be used like a file
+ opened in a text mode, and for bytes a BytesIO can be used like a file
+ opened in a binary mode.
+ """
+ if not isinstance(file, (basestring, int, long)):
+ raise TypeError("invalid file: %r" % file)
+ if not isinstance(mode, basestring):
+ raise TypeError("invalid mode: %r" % mode)
+ if not isinstance(buffering, (int, long)):
+ raise TypeError("invalid buffering: %r" % buffering)
+ if encoding is not None and not isinstance(encoding, basestring):
+ raise TypeError("invalid encoding: %r" % encoding)
+ if errors is not None and not isinstance(errors, basestring):
+ raise TypeError("invalid errors: %r" % errors)
+ modes = set(mode)
+ if modes - set("arwb+tU") or len(mode) > len(modes):
+ raise ValueError("invalid mode: %r" % mode)
+ reading = "r" in modes
+ writing = "w" in modes
+ appending = "a" in modes
+ updating = "+" in modes
+ text = "t" in modes
+ binary = "b" in modes
+ if "U" in modes:
+ if writing or appending:
+ raise ValueError("can't use U and writing mode at once")
+ reading = True
+ if text and binary:
+ raise ValueError("can't have text and binary mode at once")
+ if reading + writing + appending > 1:
+ raise ValueError("can't have read/write/append mode at once")
+ if not (reading or writing or appending):
+ raise ValueError("must have exactly one of read/write/append mode")
+ if binary and encoding is not None:
+ raise ValueError("binary mode doesn't take an encoding argument")
+ if binary and errors is not None:
+ raise ValueError("binary mode doesn't take an errors argument")
+ if binary and newline is not None:
+ raise ValueError("binary mode doesn't take a newline argument")
+ raw = FileIO(file,
+ (reading and "r" or "") +
+ (writing and "w" or "") +
+ (appending and "a" or "") +
+ (updating and "+" or ""),
+ closefd)
+ result = raw
+ try:
+ line_buffering = False
+ if buffering == 1 or buffering < 0 and raw.isatty():
+ buffering = -1
+ line_buffering = True
+ if buffering < 0:
+ buffering = DEFAULT_BUFFER_SIZE
+ try:
+ bs = os.fstat(raw.fileno()).st_blksize
+ except (os.error, AttributeError):
+ pass
+ else:
+ if bs > 1:
+ buffering = bs
+ if buffering < 0:
+ raise ValueError("invalid buffering size")
+ if buffering == 0:
+ if binary:
+ return result
+ raise ValueError("can't have unbuffered text I/O")
+ if updating:
+ buffer = BufferedRandom(raw, buffering)
+ elif writing or appending:
+ buffer = BufferedWriter(raw, buffering)
+ elif reading:
+ buffer = BufferedReader(raw, buffering)
+ else:
+ raise ValueError("unknown mode: %r" % mode)
+ result = buffer
+ if binary:
+ return result
+ text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
+ result = text
+ text.mode = mode
+ return result
+ except:
+ result.close()
+ raise
+
+
+class DocDescriptor:
+ """Helper for builtins.open.__doc__
+ """
+ def __get__(self, obj, typ):
+ return (
+ "open(file, mode='r', buffering=-1, encoding=None, "
+ "errors=None, newline=None, closefd=True)\n\n" +
+ open.__doc__)
+
+class OpenWrapper:
+ """Wrapper for builtins.open
+
+ Trick so that open won't become a bound method when stored
+ as a class variable (as dbm.dumb does).
+
+ See initstdio() in Python/pythonrun.c.
+ """
+ __doc__ = DocDescriptor()
+
+ def __new__(cls, *args, **kwargs):
+ return open(*args, **kwargs)
+
+
+class UnsupportedOperation(ValueError, IOError):
+ pass
+
+
+class IOBase:
+ __metaclass__ = abc.ABCMeta
+
+ """The abstract base class for all I/O classes, acting on streams of
+ bytes. There is no public constructor.
+
+ This class provides dummy implementations for many methods that
+ derived classes can override selectively; the default implementations
+ represent a file that cannot be read, written or seeked.
+
+ Even though IOBase does not declare read, readinto, or write because
+ their signatures will vary, implementations and clients should
+ consider those methods part of the interface. Also, implementations
+ may raise an IOError when operations they do not support are called.
+
+ The basic type used for binary data read from or written to a file is
+ the bytes type. Method arguments may also be bytearray or memoryview of
+ arrays of bytes. In some cases, such as readinto, a writable object such
+ as bytearray is required. Text I/O classes work with unicode data.
+
+ Note that calling any method (even inquiries) on a closed stream is
+ undefined. Implementations may raise IOError in this case.
+
+ IOBase (and its subclasses) support the iterator protocol, meaning
+ that an IOBase object can be iterated over yielding the lines in a
+ stream.
+
+ IOBase also supports the :keyword:`with` statement. In this example,
+ fp is closed after the suite of the with statement is complete:
+
+ with open('spam.txt', 'r') as fp:
+ fp.write('Spam and eggs!')
+ """
+
+ ### Internal ###
+
+ def _unsupported(self, name):
+ """Internal: raise an exception for unsupported operations."""
+ raise UnsupportedOperation("%s.%s() not supported" %
+ (self.__class__.__name__, name))
+
+ ### Positioning ###
+
+ def seek(self, pos, whence=0):
+ """Change stream position.
+
+ Change the stream position to byte offset pos. Argument pos is
+ interpreted relative to the position indicated by whence. Values
+ for whence are:
+
+ * 0 -- start of stream (the default); offset should be zero or positive
+ * 1 -- current stream position; offset may be negative
+ * 2 -- end of stream; offset is usually negative
+
+ Return the new absolute position.
+ """
+ self._unsupported("seek")
+
+ def tell(self):
+ """Return current stream position."""
+ return self.seek(0, 1)
+
+ def truncate(self, pos=None):
+ """Truncate file to size bytes.
+
+ Size defaults to the current IO position as reported by tell(). Return
+ the new size.
+ """
+ self._unsupported("truncate")
+
+ ### Flush and close ###
+
+ def flush(self):
+ """Flush write buffers, if applicable.
+
+ This is not implemented for read-only and non-blocking streams.
+ """
+ self._checkClosed()
+ # XXX Should this return the number of bytes written???
+
+ __closed = False
+
+ def close(self):
+ """Flush and close the IO object.
+
+ This method has no effect if the file is already closed.
+ """
+ if not self.__closed:
+ try:
+ self.flush()
+ finally:
+ self.__closed = True
+
+ def __del__(self):
+ """Destructor. Calls close()."""
+ # The try/except block is in case this is called at program
+ # exit time, when it's possible that globals have already been
+ # deleted, and then the close() call might fail. Since
+ # there's nothing we can do about such failures and they annoy
+ # the end users, we suppress the traceback.
+ try:
+ self.close()
+ except:
+ pass
+
+ ### Inquiries ###
+
+ def seekable(self):
+ """Return whether object supports random access.
+
+ If False, seek(), tell() and truncate() will raise IOError.
+ This method may need to do a test seek().
+ """
+ return False
+
+ def _checkSeekable(self, msg=None):
+ """Internal: raise an IOError if file is not seekable
+ """
+ if not self.seekable():
+ raise IOError("File or stream is not seekable."
+ if msg is None else msg)
+
+
+ def readable(self):
+ """Return whether object was opened for reading.
+
+ If False, read() will raise IOError.
+ """
+ return False
+
+ def _checkReadable(self, msg=None):
+ """Internal: raise an IOError if file is not readable
+ """
+ if not self.readable():
+ raise IOError("File or stream is not readable."
+ if msg is None else msg)
+
+ def writable(self):
+ """Return whether object was opened for writing.
+
+ If False, write() and truncate() will raise IOError.
+ """
+ return False
+
+ def _checkWritable(self, msg=None):
+ """Internal: raise an IOError if file is not writable
+ """
+ if not self.writable():
+ raise IOError("File or stream is not writable."
+ if msg is None else msg)
+
+ @property
+ def closed(self):
+ """closed: bool. True iff the file has been closed.
+
+ For backwards compatibility, this is a property, not a predicate.
+ """
+ return self.__closed
+
+ def _checkClosed(self, msg=None):
+ """Internal: raise a ValueError if file is closed
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file."
+ if msg is None else msg)
+
+ ### Context manager ###
+
+ def __enter__(self):
+ """Context management protocol. Returns self."""
+ self._checkClosed()
+ return self
+
+ def __exit__(self, *args):
+ """Context management protocol. Calls close()"""
+ self.close()
+
+ ### Lower-level APIs ###
+
+ # XXX Should these be present even if unimplemented?
+
+ def fileno(self):
+ """Returns underlying file descriptor if one exists.
+
+ An IOError is raised if the IO object does not use a file descriptor.
+ """
+ self._unsupported("fileno")
+
+ def isatty(self):
+ """Return whether this is an 'interactive' stream.
+
+ Return False if it can't be determined.
+ """
+ self._checkClosed()
+ return False
+
+ ### Readline[s] and writelines ###
+
+ def readline(self, limit=-1):
+ r"""Read and return a line from the stream.
+
+ If limit is specified, at most limit bytes will be read.
+
+ The line terminator is always b'\n' for binary files; for text
+ files, the newlines argument to open can be used to select the line
+ terminator(s) recognized.
+ """
+ # For backwards compatibility, a (slowish) readline().
+ if hasattr(self, "peek"):
+ def nreadahead():
+ readahead = self.peek(1)
+ if not readahead:
+ return 1
+ n = (readahead.find(b"\n") + 1) or len(readahead)
+ if limit >= 0:
+ n = min(n, limit)
+ return n
+ else:
+ def nreadahead():
+ return 1
+ if limit is None:
+ limit = -1
+ elif not isinstance(limit, (int, long)):
+ raise TypeError("limit must be an integer")
+ res = bytearray()
+ while limit < 0 or len(res) < limit:
+ b = self.read(nreadahead())
+ if not b:
+ break
+ res += b
+ if res.endswith(b"\n"):
+ break
+ return bytes(res)
+
+ def __iter__(self):
+ self._checkClosed()
+ return self
+
+ def next(self):
+ line = self.readline()
+ if not line:
+ raise StopIteration
+ return line
+
+ def readlines(self, hint=None):
+ """Return a list of lines from the stream.
+
+ hint can be specified to control the number of lines read: no more
+ lines will be read if the total size (in bytes/characters) of all
+ lines so far exceeds hint.
+ """
+ if hint is not None and not isinstance(hint, (int, long)):
+ raise TypeError("integer or None expected")
+ if hint is None or hint <= 0:
+ return list(self)
+ n = 0
+ lines = []
+ for line in self:
+ lines.append(line)
+ n += len(line)
+ if n >= hint:
+ break
+ return lines
+
+ def writelines(self, lines):
+ self._checkClosed()
+ for line in lines:
+ self.write(line)
+
+io.IOBase.register(IOBase)
+
+
+class RawIOBase(IOBase):
+
+ """Base class for raw binary I/O."""
+
+ # The read() method is implemented by calling readinto(); derived
+ # classes that want to support read() only need to implement
+ # readinto() as a primitive operation. In general, readinto() can be
+ # more efficient than read().
+
+ # (It would be tempting to also provide an implementation of
+ # readinto() in terms of read(), in case the latter is a more suitable
+ # primitive operation, but that would lead to nasty recursion in case
+ # a subclass doesn't implement either.)
+
+ def read(self, n=-1):
+ """Read and return up to n bytes.
+
+ Returns an empty bytes object on EOF, or None if the object is
+ set not to block and has no data to read.
+ """
+ if n is None:
+ n = -1
+ if n < 0:
+ return self.readall()
+ b = bytearray(n.__index__())
+ n = self.readinto(b)
+ if n is None:
+ return None
+ del b[n:]
+ return bytes(b)
+
+ def readall(self):
+ """Read until EOF, using multiple read() call."""
+ res = bytearray()
+ while True:
+ data = self.read(DEFAULT_BUFFER_SIZE)
+ if not data:
+ break
+ res += data
+ if res:
+ return bytes(res)
+ else:
+ # b'' or None
+ return data
+
+ def readinto(self, b):
+ """Read up to len(b) bytes into b.
+
+ Returns number of bytes read (0 for EOF), or None if the object
+ is set not to block and has no data to read.
+ """
+ self._unsupported("readinto")
+
+ def write(self, b):
+ """Write the given buffer to the IO stream.
+
+ Returns the number of bytes written, which may be less than len(b).
+ """
+ self._unsupported("write")
+
+io.RawIOBase.register(RawIOBase)
+from _io import FileIO
+RawIOBase.register(FileIO)
+
+
+class BufferedIOBase(IOBase):
+
+ """Base class for buffered IO objects.
+
+ The main difference with RawIOBase is that the read() method
+ supports omitting the size argument, and does not have a default
+ implementation that defers to readinto().
+
+ In addition, read(), readinto() and write() may raise
+ BlockingIOError if the underlying raw stream is in non-blocking
+ mode and not ready; unlike their raw counterparts, they will never
+ return None.
+
+ A typical implementation should not inherit from a RawIOBase
+ implementation, but wrap one.
+ """
+
+ def read(self, n=None):
+ """Read and return up to n bytes.
+
+ If the argument is omitted, None, or negative, reads and
+ returns all data until EOF.
+
+ If the argument is positive, and the underlying raw stream is
+ not 'interactive', multiple raw reads may be issued to satisfy
+ the byte count (unless EOF is reached first). But for
+ interactive raw streams (XXX and for pipes?), at most one raw
+ read will be issued, and a short result does not imply that
+ EOF is imminent.
+
+ Returns an empty bytes array on EOF.
+
+ Raises BlockingIOError if the underlying raw stream has no
+ data at the moment.
+ """
+ self._unsupported("read")
+
+ def read1(self, n=None):
+ """Read up to n bytes with at most one read() system call."""
+ self._unsupported("read1")
+
+ def readinto(self, b):
+ """Read up to len(b) bytes into b.
+
+ Like read(), this may issue multiple reads to the underlying raw
+ stream, unless the latter is 'interactive'.
+
+ Returns the number of bytes read (0 for EOF).
+
+ Raises BlockingIOError if the underlying raw stream has no
+ data at the moment.
+ """
+ data = self.read(len(b))
+ n = len(data)
+ try:
+ b[:n] = data
+ except TypeError as err:
+ import array
+ if not isinstance(b, array.array):
+ raise err
+ b[:n] = array.array(b'b', data)
+ return n
+
+ def write(self, b):
+ """Write the given buffer to the IO stream.
+
+ Return the number of bytes written, which is always len(b).
+
+ Raises BlockingIOError if the buffer is full and the
+ underlying raw stream cannot accept more data at the moment.
+ """
+ self._unsupported("write")
+
+ def detach(self):
+ """
+ Separate the underlying raw stream from the buffer and return it.
+
+ After the raw stream has been detached, the buffer is in an unusable
+ state.
+ """
+ self._unsupported("detach")
+
+io.BufferedIOBase.register(BufferedIOBase)
+
+
+class _BufferedIOMixin(BufferedIOBase):
+
+ """A mixin implementation of BufferedIOBase with an underlying raw stream.
+
+ This passes most requests on to the underlying raw stream. It
+ does *not* provide implementations of read(), readinto() or
+ write().
+ """
+
+ def __init__(self, raw):
+ self._raw = raw
+
+ ### Positioning ###
+
+ def seek(self, pos, whence=0):
+ new_position = self.raw.seek(pos, whence)
+ if new_position < 0:
+ raise IOError("seek() returned an invalid position")
+ return new_position
+
+ def tell(self):
+ pos = self.raw.tell()
+ if pos < 0:
+ raise IOError("tell() returned an invalid position")
+ return pos
+
+ def truncate(self, pos=None):
+ # Flush the stream. We're mixing buffered I/O with lower-level I/O,
+ # and a flush may be necessary to synch both views of the current
+ # file state.
+ self.flush()
+
+ if pos is None:
+ pos = self.tell()
+ # XXX: Should seek() be used, instead of passing the position
+ # XXX directly to truncate?
+ return self.raw.truncate(pos)
+
+ ### Flush and close ###
+
+ def flush(self):
+ if self.closed:
+ raise ValueError("flush of closed file")
+ self.raw.flush()
+
+ def close(self):
+ if self.raw is not None and not self.closed:
+ try:
+ # may raise BlockingIOError or BrokenPipeError etc
+ self.flush()
+ finally:
+ self.raw.close()
+
+ def detach(self):
+ if self.raw is None:
+ raise ValueError("raw stream already detached")
+ self.flush()
+ raw = self._raw
+ self._raw = None
+ return raw
+
+ ### Inquiries ###
+
+ def seekable(self):
+ return self.raw.seekable()
+
+ def readable(self):
+ return self.raw.readable()
+
+ def writable(self):
+ return self.raw.writable()
+
+ @property
+ def raw(self):
+ return self._raw
+
+ @property
+ def closed(self):
+ return self.raw.closed
+
+ @property
+ def name(self):
+ return self.raw.name
+
+ @property
+ def mode(self):
+ return self.raw.mode
+
+ def __repr__(self):
+ clsname = self.__class__.__name__
+ try:
+ name = self.name
+ except Exception:
+ return "<_pyio.{0}>".format(clsname)
+ else:
+ return "<_pyio.{0} name={1!r}>".format(clsname, name)
+
+ ### Lower-level APIs ###
+
+ def fileno(self):
+ return self.raw.fileno()
+
+ def isatty(self):
+ return self.raw.isatty()
+
+
+class BytesIO(BufferedIOBase):
+
+ """Buffered I/O implementation using an in-memory bytes buffer."""
+
+ def __init__(self, initial_bytes=None):
+ buf = bytearray()
+ if initial_bytes is not None:
+ buf.extend(initial_bytes)
+ self._buffer = buf
+ self._pos = 0
+
+ def __getstate__(self):
+ if self.closed:
+ raise ValueError("__getstate__ on closed file")
+ return self.__dict__.copy()
+
+ def getvalue(self):
+ """Return the bytes value (contents) of the buffer
+ """
+ if self.closed:
+ raise ValueError("getvalue on closed file")
+ return bytes(self._buffer)
+
+ def read(self, n=None):
+ if self.closed:
+ raise ValueError("read from closed file")
+ if n is None:
+ n = -1
+ if not isinstance(n, (int, long)):
+ raise TypeError("integer argument expected, got {0!r}".format(
+ type(n)))
+ if n < 0:
+ n = len(self._buffer)
+ if len(self._buffer) <= self._pos:
+ return b""
+ newpos = min(len(self._buffer), self._pos + n)
+ b = self._buffer[self._pos : newpos]
+ self._pos = newpos
+ return bytes(b)
+
+ def read1(self, n):
+ """This is the same as read.
+ """
+ return self.read(n)
+
+ def write(self, b):
+ if self.closed:
+ raise ValueError("write to closed file")
+ if isinstance(b, unicode):
+ raise TypeError("can't write unicode to binary stream")
+ n = len(b)
+ if n == 0:
+ return 0
+ pos = self._pos
+ if pos > len(self._buffer):
+ # Inserts null bytes between the current end of the file
+ # and the new write position.
+ padding = b'\x00' * (pos - len(self._buffer))
+ self._buffer += padding
+ self._buffer[pos:pos + n] = b
+ self._pos += n
+ return n
+
+ def seek(self, pos, whence=0):
+ if self.closed:
+ raise ValueError("seek on closed file")
+ try:
+ pos.__index__
+ except AttributeError:
+ raise TypeError("an integer is required")
+ if whence == 0:
+ if pos < 0:
+ raise ValueError("negative seek position %r" % (pos,))
+ self._pos = pos
+ elif whence == 1:
+ self._pos = max(0, self._pos + pos)
+ elif whence == 2:
+ self._pos = max(0, len(self._buffer) + pos)
+ else:
+ raise ValueError("invalid whence value")
+ return self._pos
+
+ def tell(self):
+ if self.closed:
+ raise ValueError("tell on closed file")
+ return self._pos
+
+ def truncate(self, pos=None):
+ if self.closed:
+ raise ValueError("truncate on closed file")
+ if pos is None:
+ pos = self._pos
+ else:
+ try:
+ pos.__index__
+ except AttributeError:
+ raise TypeError("an integer is required")
+ if pos < 0:
+ raise ValueError("negative truncate position %r" % (pos,))
+ del self._buffer[pos:]
+ return pos
+
+ def readable(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file.")
+ return True
+
+ def writable(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file.")
+ return True
+
+ def seekable(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file.")
+ return True
+
+
+class BufferedReader(_BufferedIOMixin):
+
+ """BufferedReader(raw[, buffer_size])
+
+ A buffer for a readable, sequential BaseRawIO object.
+
+ The constructor creates a BufferedReader for the given readable raw
+ stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
+ is used.
+ """
+
+ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
+ """Create a new buffered reader using the given readable raw IO object.
+ """
+ if not raw.readable():
+ raise IOError('"raw" argument must be readable.')
+
+ _BufferedIOMixin.__init__(self, raw)
+ if buffer_size <= 0:
+ raise ValueError("invalid buffer size")
+ self.buffer_size = buffer_size
+ self._reset_read_buf()
+ self._read_lock = Lock()
+
+ def _reset_read_buf(self):
+ self._read_buf = b""
+ self._read_pos = 0
+
+ def read(self, n=None):
+ """Read n bytes.
+
+ Returns exactly n bytes of data unless the underlying raw IO
+ stream reaches EOF or if the call would block in non-blocking
+ mode. If n is negative, read until EOF or until read() would
+ block.
+ """
+ if n is not None and n < -1:
+ raise ValueError("invalid number of bytes to read")
+ with self._read_lock:
+ return self._read_unlocked(n)
+
+ def _read_unlocked(self, n=None):
+ nodata_val = b""
+ empty_values = (b"", None)
+ buf = self._read_buf
+ pos = self._read_pos
+
+ # Special case for when the number of bytes to read is unspecified.
+ if n is None or n == -1:
+ self._reset_read_buf()
+ chunks = [buf[pos:]] # Strip the consumed bytes.
+ current_size = 0
+ while True:
+ # Read until EOF or until read() would block.
+ try:
+ chunk = self.raw.read()
+ except IOError as e:
+ if e.errno != EINTR:
+ raise
+ continue
+ if chunk in empty_values:
+ nodata_val = chunk
+ break
+ current_size += len(chunk)
+ chunks.append(chunk)
+ return b"".join(chunks) or nodata_val
+
+ # The number of bytes to read is specified, return at most n bytes.
+ avail = len(buf) - pos # Length of the available buffered data.
+ if n <= avail:
+ # Fast path: the data to read is fully buffered.
+ self._read_pos += n
+ return buf[pos:pos+n]
+ # Slow path: read from the stream until enough bytes are read,
+ # or until an EOF occurs or until read() would block.
+ chunks = [buf[pos:]]
+ wanted = max(self.buffer_size, n)
+ while avail < n:
+ try:
+ chunk = self.raw.read(wanted)
+ except IOError as e:
+ if e.errno != EINTR:
+ raise
+ continue
+ if chunk in empty_values:
+ nodata_val = chunk
+ break
+ avail += len(chunk)
+ chunks.append(chunk)
+ # n is more than avail only when an EOF occurred or when
+ # read() would have blocked.
+ n = min(n, avail)
+ out = b"".join(chunks)
+ self._read_buf = out[n:] # Save the extra data in the buffer.
+ self._read_pos = 0
+ return out[:n] if out else nodata_val
+
+ def peek(self, n=0):
+ """Returns buffered bytes without advancing the position.
+
+ The argument indicates a desired minimal number of bytes; we
+ do at most one raw read to satisfy it. We never return more
+ than self.buffer_size.
+ """
+ with self._read_lock:
+ return self._peek_unlocked(n)
+
+ def _peek_unlocked(self, n=0):
+ want = min(n, self.buffer_size)
+ have = len(self._read_buf) - self._read_pos
+ if have < want or have <= 0:
+ to_read = self.buffer_size - have
+ while True:
+ try:
+ current = self.raw.read(to_read)
+ except IOError as e:
+ if e.errno != EINTR:
+ raise
+ continue
+ break
+ if current:
+ self._read_buf = self._read_buf[self._read_pos:] + current
+ self._read_pos = 0
+ return self._read_buf[self._read_pos:]
+
+ def read1(self, n):
+ """Reads up to n bytes, with at most one read() system call."""
+ # Returns up to n bytes. If at least one byte is buffered, we
+ # only return buffered bytes. Otherwise, we do one raw read.
+ if n < 0:
+ raise ValueError("number of bytes to read must be positive")
+ if n == 0:
+ return b""
+ with self._read_lock:
+ self._peek_unlocked(1)
+ return self._read_unlocked(
+ min(n, len(self._read_buf) - self._read_pos))
+
+ def tell(self):
+ return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
+
+ def seek(self, pos, whence=0):
+ if not (0 <= whence <= 2):
+ raise ValueError("invalid whence value")
+ with self._read_lock:
+ if whence == 1:
+ pos -= len(self._read_buf) - self._read_pos
+ pos = _BufferedIOMixin.seek(self, pos, whence)
+ self._reset_read_buf()
+ return pos
+
+class BufferedWriter(_BufferedIOMixin):
+
+ """A buffer for a writeable sequential RawIO object.
+
+ The constructor creates a BufferedWriter for the given writeable raw
+ stream. If the buffer_size is not given, it defaults to
+ DEFAULT_BUFFER_SIZE.
+ """
+
+ _warning_stack_offset = 2
+
+ def __init__(self, raw,
+ buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ if not raw.writable():
+ raise IOError('"raw" argument must be writable.')
+
+ _BufferedIOMixin.__init__(self, raw)
+ if buffer_size <= 0:
+ raise ValueError("invalid buffer size")
+ if max_buffer_size is not None:
+ warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
+ self._warning_stack_offset)
+ self.buffer_size = buffer_size
+ self._write_buf = bytearray()
+ self._write_lock = Lock()
+
+ def write(self, b):
+ if self.closed:
+ raise ValueError("write to closed file")
+ if isinstance(b, unicode):
+ raise TypeError("can't write unicode to binary stream")
+ with self._write_lock:
+ # XXX we can implement some more tricks to try and avoid
+ # partial writes
+ if len(self._write_buf) > self.buffer_size:
+ # We're full, so let's pre-flush the buffer. (This may
+ # raise BlockingIOError with characters_written == 0.)
+ self._flush_unlocked()
+ before = len(self._write_buf)
+ self._write_buf.extend(b)
+ written = len(self._write_buf) - before
+ if len(self._write_buf) > self.buffer_size:
+ try:
+ self._flush_unlocked()
+ except BlockingIOError as e:
+ if len(self._write_buf) > self.buffer_size:
+ # We've hit the buffer_size. We have to accept a partial
+ # write and cut back our buffer.
+ overage = len(self._write_buf) - self.buffer_size
+ written -= overage
+ self._write_buf = self._write_buf[:self.buffer_size]
+ raise BlockingIOError(e.errno, e.strerror, written)
+ return written
+
+ def truncate(self, pos=None):
+ with self._write_lock:
+ self._flush_unlocked()
+ if pos is None:
+ pos = self.raw.tell()
+ return self.raw.truncate(pos)
+
+ def flush(self):
+ with self._write_lock:
+ self._flush_unlocked()
+
+ def _flush_unlocked(self):
+ if self.closed:
+ raise ValueError("flush of closed file")
+ while self._write_buf:
+ try:
+ n = self.raw.write(self._write_buf)
+ except BlockingIOError:
+ raise RuntimeError("self.raw should implement RawIOBase: it "
+ "should not raise BlockingIOError")
+ except IOError as e:
+ if e.errno != EINTR:
+ raise
+ continue
+ if n is None:
+ raise BlockingIOError(
+ errno.EAGAIN,
+ "write could not complete without blocking", 0)
+ if n > len(self._write_buf) or n < 0:
+ raise IOError("write() returned incorrect number of bytes")
+ del self._write_buf[:n]
+
+ def tell(self):
+ return _BufferedIOMixin.tell(self) + len(self._write_buf)
+
+ def seek(self, pos, whence=0):
+ if not (0 <= whence <= 2):
+ raise ValueError("invalid whence")
+ with self._write_lock:
+ self._flush_unlocked()
+ return _BufferedIOMixin.seek(self, pos, whence)
+
+
+class BufferedRWPair(BufferedIOBase):
+
+ """A buffered reader and writer object together.
+
+ A buffered reader object and buffered writer object put together to
+ form a sequential IO object that can read and write. This is typically
+ used with a socket or two-way pipe.
+
+ reader and writer are RawIOBase objects that are readable and
+ writeable respectively. If the buffer_size is omitted it defaults to
+ DEFAULT_BUFFER_SIZE.
+ """
+
+ # XXX The usefulness of this (compared to having two separate IO
+ # objects) is questionable.
+
+ def __init__(self, reader, writer,
+ buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ """Constructor.
+
+ The arguments are two RawIO instances.
+ """
+ if max_buffer_size is not None:
+ warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
+
+ if not reader.readable():
+ raise IOError('"reader" argument must be readable.')
+
+ if not writer.writable():
+ raise IOError('"writer" argument must be writable.')
+
+ self.reader = BufferedReader(reader, buffer_size)
+ self.writer = BufferedWriter(writer, buffer_size)
+
+ def read(self, n=None):
+ if n is None:
+ n = -1
+ return self.reader.read(n)
+
+ def readinto(self, b):
+ return self.reader.readinto(b)
+
+ def write(self, b):
+ return self.writer.write(b)
+
+ def peek(self, n=0):
+ return self.reader.peek(n)
+
+ def read1(self, n):
+ return self.reader.read1(n)
+
+ def readable(self):
+ return self.reader.readable()
+
+ def writable(self):
+ return self.writer.writable()
+
+ def flush(self):
+ return self.writer.flush()
+
+ def close(self):
+ try:
+ self.writer.close()
+ finally:
+ self.reader.close()
+
+ def isatty(self):
+ return self.reader.isatty() or self.writer.isatty()
+
+ @property
+ def closed(self):
+ return self.writer.closed
+
+
+class BufferedRandom(BufferedWriter, BufferedReader):
+
+ """A buffered interface to random access streams.
+
+ The constructor creates a reader and writer for a seekable stream,
+ raw, given in the first argument. If the buffer_size is omitted it
+ defaults to DEFAULT_BUFFER_SIZE.
+ """
+
+ _warning_stack_offset = 3
+
+ def __init__(self, raw,
+ buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ raw._checkSeekable()
+ BufferedReader.__init__(self, raw, buffer_size)
+ BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
+
+ def seek(self, pos, whence=0):
+ if not (0 <= whence <= 2):
+ raise ValueError("invalid whence")
+ self.flush()
+ if self._read_buf:
+ # Undo read ahead.
+ with self._read_lock:
+ self.raw.seek(self._read_pos - len(self._read_buf), 1)
+ # First do the raw seek, then empty the read buffer, so that
+ # if the raw seek fails, we don't lose buffered data forever.
+ pos = self.raw.seek(pos, whence)
+ with self._read_lock:
+ self._reset_read_buf()
+ if pos < 0:
+ raise IOError("seek() returned invalid position")
+ return pos
+
+ def tell(self):
+ if self._write_buf:
+ return BufferedWriter.tell(self)
+ else:
+ return BufferedReader.tell(self)
+
+ def truncate(self, pos=None):
+ if pos is None:
+ pos = self.tell()
+ # Use seek to flush the read buffer.
+ return BufferedWriter.truncate(self, pos)
+
+ def read(self, n=None):
+ if n is None:
+ n = -1
+ self.flush()
+ return BufferedReader.read(self, n)
+
+ def readinto(self, b):
+ self.flush()
+ return BufferedReader.readinto(self, b)
+
+ def peek(self, n=0):
+ self.flush()
+ return BufferedReader.peek(self, n)
+
+ def read1(self, n):
+ self.flush()
+ return BufferedReader.read1(self, n)
+
+ def write(self, b):
+ if self._read_buf:
+ # Undo readahead
+ with self._read_lock:
+ self.raw.seek(self._read_pos - len(self._read_buf), 1)
+ self._reset_read_buf()
+ return BufferedWriter.write(self, b)
+
+
+class TextIOBase(IOBase):
+
+ """Base class for text I/O.
+
+ This class provides a character and line based interface to stream
+ I/O. There is no readinto method because Python's character strings
+ are immutable. There is no public constructor.
+ """
+
+ def read(self, n=-1):
+ """Read at most n characters from stream.
+
+ Read from underlying buffer until we have n characters or we hit EOF.
+ If n is negative or omitted, read until EOF.
+ """
+ self._unsupported("read")
+
+ def write(self, s):
+ """Write string s to stream."""
+ self._unsupported("write")
+
+ def truncate(self, pos=None):
+ """Truncate size to pos."""
+ self._unsupported("truncate")
+
+ def readline(self):
+ """Read until newline or EOF.
+
+ Returns an empty string if EOF is hit immediately.
+ """
+ self._unsupported("readline")
+
+ def detach(self):
+ """
+ Separate the underlying buffer from the TextIOBase and return it.
+
+ After the underlying buffer has been detached, the TextIO is in an
+ unusable state.
+ """
+ self._unsupported("detach")
+
+ @property
+ def encoding(self):
+ """Subclasses should override."""
+ return None
+
+ @property
+ def newlines(self):
+ """Line endings translated so far.
+
+ Only line endings translated during reading are considered.
+
+ Subclasses should override.
+ """
+ return None
+
+ @property
+ def errors(self):
+ """Error setting of the decoder or encoder.
+
+ Subclasses should override."""
+ return None
+
+io.TextIOBase.register(TextIOBase)
+
+
+class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
+ r"""Codec used when reading a file in universal newlines mode. It wraps
+ another incremental decoder, translating \r\n and \r into \n. It also
+ records the types of newlines encountered. When used with
+ translate=False, it ensures that the newline sequence is returned in
+ one piece.
+ """
+ def __init__(self, decoder, translate, errors='strict'):
+ codecs.IncrementalDecoder.__init__(self, errors=errors)
+ self.translate = translate
+ self.decoder = decoder
+ self.seennl = 0
+ self.pendingcr = False
+
+ def decode(self, input, final=False):
+ # decode input (with the eventual \r from a previous pass)
+ if self.decoder is None:
+ output = input
+ else:
+ output = self.decoder.decode(input, final=final)
+ if self.pendingcr and (output or final):
+ output = "\r" + output
+ self.pendingcr = False
+
+ # retain last \r even when not translating data:
+ # then readline() is sure to get \r\n in one pass
+ if output.endswith("\r") and not final:
+ output = output[:-1]
+ self.pendingcr = True
+
+ # Record which newlines are read
+ crlf = output.count('\r\n')
+ cr = output.count('\r') - crlf
+ lf = output.count('\n') - crlf
+ self.seennl |= (lf and self._LF) | (cr and self._CR) \
+ | (crlf and self._CRLF)
+
+ if self.translate:
+ if crlf:
+ output = output.replace("\r\n", "\n")
+ if cr:
+ output = output.replace("\r", "\n")
+
+ return output
+
+ def getstate(self):
+ if self.decoder is None:
+ buf = b""
+ flag = 0
+ else:
+ buf, flag = self.decoder.getstate()
+ flag <<= 1
+ if self.pendingcr:
+ flag |= 1
+ return buf, flag
+
+ def setstate(self, state):
+ buf, flag = state
+ self.pendingcr = bool(flag & 1)
+ if self.decoder is not None:
+ self.decoder.setstate((buf, flag >> 1))
+
+ def reset(self):
+ self.seennl = 0
+ self.pendingcr = False
+ if self.decoder is not None:
+ self.decoder.reset()
+
+ _LF = 1
+ _CR = 2
+ _CRLF = 4
+
+ @property
+ def newlines(self):
+ return (None,
+ "\n",
+ "\r",
+ ("\r", "\n"),
+ "\r\n",
+ ("\n", "\r\n"),
+ ("\r", "\r\n"),
+ ("\r", "\n", "\r\n")
+ )[self.seennl]
+
+
+class TextIOWrapper(TextIOBase):
+
+ r"""Character and line based layer over a BufferedIOBase object, buffer.
+
+ encoding gives the name of the encoding that the stream will be
+ decoded or encoded with. It defaults to locale.getpreferredencoding.
+
+ errors determines the strictness of encoding and decoding (see the
+ codecs.register) and defaults to "strict".
+
+ newline can be None, '', '\n', '\r', or '\r\n'. It controls the
+ handling of line endings. If it is None, universal newlines is
+ enabled. With this enabled, on input, the lines endings '\n', '\r',
+ or '\r\n' are translated to '\n' before being returned to the
+ caller. Conversely, on output, '\n' is translated to the system
+ default line separator, os.linesep. If newline is any other of its
+ legal values, that newline becomes the newline when the file is read
+ and it is returned untranslated. On output, '\n' is converted to the
+ newline.
+
+ If line_buffering is True, a call to flush is implied when a call to
+ write contains a newline character.
+ """
+
+ _CHUNK_SIZE = 2048
+
+ def __init__(self, buffer, encoding=None, errors=None, newline=None,
+ line_buffering=False):
+ if newline is not None and not isinstance(newline, basestring):
+ raise TypeError("illegal newline type: %r" % (type(newline),))
+ if newline not in (None, "", "\n", "\r", "\r\n"):
+ raise ValueError("illegal newline value: %r" % (newline,))
+ if encoding is None:
+ try:
+ import locale
+ except ImportError:
+ # Importing locale may fail if Python is being built
+ encoding = "ascii"
+ else:
+ encoding = locale.getpreferredencoding()
+
+ if not isinstance(encoding, basestring):
+ raise ValueError("invalid encoding: %r" % encoding)
+
+ if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding:
+ msg = ("%r is not a text encoding; "
+ "use codecs.open() to handle arbitrary codecs")
+ warnings.warnpy3k(msg % encoding, stacklevel=2)
+
+ if errors is None:
+ errors = "strict"
+ else:
+ if not isinstance(errors, basestring):
+ raise ValueError("invalid errors: %r" % errors)
+
+ self._buffer = buffer
+ self._line_buffering = line_buffering
+ self._encoding = encoding
+ self._errors = errors
+ self._readuniversal = not newline
+ self._readtranslate = newline is None
+ self._readnl = newline
+ self._writetranslate = newline != ''
+ self._writenl = newline or os.linesep
+ self._encoder = None
+ self._decoder = None
+ self._decoded_chars = '' # buffer for text returned from decoder
+ self._decoded_chars_used = 0 # offset into _decoded_chars for read()
+ self._snapshot = None # info for reconstructing decoder state
+ self._seekable = self._telling = self.buffer.seekable()
+
+ if self._seekable and self.writable():
+ position = self.buffer.tell()
+ if position != 0:
+ try:
+ self._get_encoder().setstate(0)
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+
+ # self._snapshot is either None, or a tuple (dec_flags, next_input)
+ # where dec_flags is the second (integer) item of the decoder state
+ # and next_input is the chunk of input bytes that comes next after the
+ # snapshot point. We use this to reconstruct decoder states in tell().
+
+ # Naming convention:
+ # - "bytes_..." for integer variables that count input bytes
+ # - "chars_..." for integer variables that count decoded characters
+
+ def __repr__(self):
+ try:
+ name = self.name
+ except Exception:
+ return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
+ else:
+ return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
+ name, self.encoding)
+
+ @property
+ def encoding(self):
+ return self._encoding
+
+ @property
+ def errors(self):
+ return self._errors
+
+ @property
+ def line_buffering(self):
+ return self._line_buffering
+
+ @property
+ def buffer(self):
+ return self._buffer
+
+ def seekable(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file.")
+ return self._seekable
+
+ def readable(self):
+ return self.buffer.readable()
+
+ def writable(self):
+ return self.buffer.writable()
+
+ def flush(self):
+ self.buffer.flush()
+ self._telling = self._seekable
+
+ def close(self):
+ if self.buffer is not None and not self.closed:
+ try:
+ self.flush()
+ finally:
+ self.buffer.close()
+
+ @property
+ def closed(self):
+ return self.buffer.closed
+
+ @property
+ def name(self):
+ return self.buffer.name
+
+ def fileno(self):
+ return self.buffer.fileno()
+
+ def isatty(self):
+ return self.buffer.isatty()
+
+ def write(self, s):
+ if self.closed:
+ raise ValueError("write to closed file")
+ if not isinstance(s, unicode):
+ raise TypeError("can't write %s to text stream" %
+ s.__class__.__name__)
+ length = len(s)
+ haslf = (self._writetranslate or self._line_buffering) and "\n" in s
+ if haslf and self._writetranslate and self._writenl != "\n":
+ s = s.replace("\n", self._writenl)
+ encoder = self._encoder or self._get_encoder()
+ # XXX What if we were just reading?
+ b = encoder.encode(s)
+ self.buffer.write(b)
+ if self._line_buffering and (haslf or "\r" in s):
+ self.flush()
+ self._snapshot = None
+ if self._decoder:
+ self._decoder.reset()
+ return length
+
+ def _get_encoder(self):
+ make_encoder = codecs.getincrementalencoder(self._encoding)
+ self._encoder = make_encoder(self._errors)
+ return self._encoder
+
+ def _get_decoder(self):
+ make_decoder = codecs.getincrementaldecoder(self._encoding)
+ decoder = make_decoder(self._errors)
+ if self._readuniversal:
+ decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
+ self._decoder = decoder
+ return decoder
+
+ # The following three methods implement an ADT for _decoded_chars.
+ # Text returned from the decoder is buffered here until the client
+ # requests it by calling our read() or readline() method.
+ def _set_decoded_chars(self, chars):
+ """Set the _decoded_chars buffer."""
+ self._decoded_chars = chars
+ self._decoded_chars_used = 0
+
+ def _get_decoded_chars(self, n=None):
+ """Advance into the _decoded_chars buffer."""
+ offset = self._decoded_chars_used
+ if n is None:
+ chars = self._decoded_chars[offset:]
+ else:
+ chars = self._decoded_chars[offset:offset + n]
+ self._decoded_chars_used += len(chars)
+ return chars
+
+ def _rewind_decoded_chars(self, n):
+ """Rewind the _decoded_chars buffer."""
+ if self._decoded_chars_used < n:
+ raise AssertionError("rewind decoded_chars out of bounds")
+ self._decoded_chars_used -= n
+
+ def _read_chunk(self):
+ """
+ Read and decode the next chunk of data from the BufferedReader.
+ """
+
+ # The return value is True unless EOF was reached. The decoded
+ # string is placed in self._decoded_chars (replacing its previous
+ # value). The entire input chunk is sent to the decoder, though
+ # some of it may remain buffered in the decoder, yet to be
+ # converted.
+
+ if self._decoder is None:
+ raise ValueError("no decoder")
+
+ if self._telling:
+ # To prepare for tell(), we need to snapshot a point in the
+ # file where the decoder's input buffer is empty.
+
+ dec_buffer, dec_flags = self._decoder.getstate()
+ # Given this, we know there was a valid snapshot point
+ # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
+
+ # Read a chunk, decode it, and put the result in self._decoded_chars.
+ input_chunk = self.buffer.read1(self._CHUNK_SIZE)
+ eof = not input_chunk
+ self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
+
+ if self._telling:
+ # At the snapshot point, len(dec_buffer) bytes before the read,
+ # the next input to be decoded is dec_buffer + input_chunk.
+ self._snapshot = (dec_flags, dec_buffer + input_chunk)
+
+ return not eof
+
+ def _pack_cookie(self, position, dec_flags=0,
+ bytes_to_feed=0, need_eof=0, chars_to_skip=0):
+ # The meaning of a tell() cookie is: seek to position, set the
+ # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
+ # into the decoder with need_eof as the EOF flag, then skip
+ # chars_to_skip characters of the decoded result. For most simple
+ # decoders, tell() will often just give a byte offset in the file.
+ return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
+ (chars_to_skip<<192) | bool(need_eof)<<256)
+
+ def _unpack_cookie(self, bigint):
+ rest, position = divmod(bigint, 1<<64)
+ rest, dec_flags = divmod(rest, 1<<64)
+ rest, bytes_to_feed = divmod(rest, 1<<64)
+ need_eof, chars_to_skip = divmod(rest, 1<<64)
+ return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
+
+ def tell(self):
+ if not self._seekable:
+ raise IOError("underlying stream is not seekable")
+ if not self._telling:
+ raise IOError("telling position disabled by next() call")
+ self.flush()
+ position = self.buffer.tell()
+ decoder = self._decoder
+ if decoder is None or self._snapshot is None:
+ if self._decoded_chars:
+ # This should never happen.
+ raise AssertionError("pending decoded text")
+ return position
+
+ # Skip backward to the snapshot point (see _read_chunk).
+ dec_flags, next_input = self._snapshot
+ position -= len(next_input)
+
+ # How many decoded characters have been used up since the snapshot?
+ chars_to_skip = self._decoded_chars_used
+ if chars_to_skip == 0:
+ # We haven't moved from the snapshot point.
+ return self._pack_cookie(position, dec_flags)
+
+ # Starting from the snapshot position, we will walk the decoder
+ # forward until it gives us enough decoded characters.
+ saved_state = decoder.getstate()
+ try:
+ # Note our initial start point.
+ decoder.setstate((b'', dec_flags))
+ start_pos = position
+ start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
+ need_eof = 0
+
+ # Feed the decoder one byte at a time. As we go, note the
+ # nearest "safe start point" before the current location
+ # (a point where the decoder has nothing buffered, so seek()
+ # can safely start from there and advance to this location).
+ for next_byte in next_input:
+ bytes_fed += 1
+ chars_decoded += len(decoder.decode(next_byte))
+ dec_buffer, dec_flags = decoder.getstate()
+ if not dec_buffer and chars_decoded <= chars_to_skip:
+ # Decoder buffer is empty, so this is a safe start point.
+ start_pos += bytes_fed
+ chars_to_skip -= chars_decoded
+ start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
+ if chars_decoded >= chars_to_skip:
+ break
+ else:
+ # We didn't get enough decoded data; signal EOF to get more.
+ chars_decoded += len(decoder.decode(b'', final=True))
+ need_eof = 1
+ if chars_decoded < chars_to_skip:
+ raise IOError("can't reconstruct logical file position")
+
+ # The returned cookie corresponds to the last safe start point.
+ return self._pack_cookie(
+ start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
+ finally:
+ decoder.setstate(saved_state)
+
+ def truncate(self, pos=None):
+ self.flush()
+ if pos is None:
+ pos = self.tell()
+ return self.buffer.truncate(pos)
+
+ def detach(self):
+ if self.buffer is None:
+ raise ValueError("buffer is already detached")
+ self.flush()
+ buffer = self._buffer
+ self._buffer = None
+ return buffer
+
+ def seek(self, cookie, whence=0):
+ if self.closed:
+ raise ValueError("tell on closed file")
+ if not self._seekable:
+ raise IOError("underlying stream is not seekable")
+ if whence == 1: # seek relative to current position
+ if cookie != 0:
+ raise IOError("can't do nonzero cur-relative seeks")
+ # Seeking to the current position should attempt to
+ # sync the underlying buffer with the current position.
+ whence = 0
+ cookie = self.tell()
+ if whence == 2: # seek relative to end of file
+ if cookie != 0:
+ raise IOError("can't do nonzero end-relative seeks")
+ self.flush()
+ position = self.buffer.seek(0, 2)
+ self._set_decoded_chars('')
+ self._snapshot = None
+ if self._decoder:
+ self._decoder.reset()
+ return position
+ if whence != 0:
+ raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
+ (whence,))
+ if cookie < 0:
+ raise ValueError("negative seek position %r" % (cookie,))
+ self.flush()
+
+ # The strategy of seek() is to go back to the safe start point
+ # and replay the effect of read(chars_to_skip) from there.
+ start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
+ self._unpack_cookie(cookie)
+
+ # Seek back to the safe start point.
+ self.buffer.seek(start_pos)
+ self._set_decoded_chars('')
+ self._snapshot = None
+
+ # Restore the decoder to its state from the safe start point.
+ if cookie == 0 and self._decoder:
+ self._decoder.reset()
+ elif self._decoder or dec_flags or chars_to_skip:
+ self._decoder = self._decoder or self._get_decoder()
+ self._decoder.setstate((b'', dec_flags))
+ self._snapshot = (dec_flags, b'')
+
+ if chars_to_skip:
+ # Just like _read_chunk, feed the decoder and save a snapshot.
+ input_chunk = self.buffer.read(bytes_to_feed)
+ self._set_decoded_chars(
+ self._decoder.decode(input_chunk, need_eof))
+ self._snapshot = (dec_flags, input_chunk)
+
+ # Skip chars_to_skip of the decoded characters.
+ if len(self._decoded_chars) < chars_to_skip:
+ raise IOError("can't restore logical file position")
+ self._decoded_chars_used = chars_to_skip
+
+ # Finally, reset the encoder (merely useful for proper BOM handling)
+ try:
+ encoder = self._encoder or self._get_encoder()
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+ else:
+ if cookie != 0:
+ encoder.setstate(0)
+ else:
+ encoder.reset()
+ return cookie
+
+ def read(self, n=None):
+ self._checkReadable()
+ if n is None:
+ n = -1
+ decoder = self._decoder or self._get_decoder()
+ try:
+ n.__index__
+ except AttributeError:
+ raise TypeError("an integer is required")
+ if n < 0:
+ # Read everything.
+ result = (self._get_decoded_chars() +
+ decoder.decode(self.buffer.read(), final=True))
+ self._set_decoded_chars('')
+ self._snapshot = None
+ return result
+ else:
+ # Keep reading chunks until we have n characters to return.
+ eof = False
+ result = self._get_decoded_chars(n)
+ while len(result) < n and not eof:
+ eof = not self._read_chunk()
+ result += self._get_decoded_chars(n - len(result))
+ return result
+
+ def next(self):
+ self._telling = False
+ line = self.readline()
+ if not line:
+ self._snapshot = None
+ self._telling = self._seekable
+ raise StopIteration
+ return line
+
+ def readline(self, limit=None):
+ if self.closed:
+ raise ValueError("read from closed file")
+ if limit is None:
+ limit = -1
+ elif not isinstance(limit, (int, long)):
+ raise TypeError("limit must be an integer")
+
+ # Grab all the decoded text (we will rewind any extra bits later).
+ line = self._get_decoded_chars()
+
+ start = 0
+ # Make the decoder if it doesn't already exist.
+ if not self._decoder:
+ self._get_decoder()
+
+ pos = endpos = None
+ while True:
+ if self._readtranslate:
+ # Newlines are already translated, only search for \n
+ pos = line.find('\n', start)
+ if pos >= 0:
+ endpos = pos + 1
+ break
+ else:
+ start = len(line)
+
+ elif self._readuniversal:
+ # Universal newline search. Find any of \r, \r\n, \n
+ # The decoder ensures that \r\n are not split in two pieces
+
+ # In C we'd look for these in parallel of course.
+ nlpos = line.find("\n", start)
+ crpos = line.find("\r", start)
+ if crpos == -1:
+ if nlpos == -1:
+ # Nothing found
+ start = len(line)
+ else:
+ # Found \n
+ endpos = nlpos + 1
+ break
+ elif nlpos == -1:
+ # Found lone \r
+ endpos = crpos + 1
+ break
+ elif nlpos < crpos:
+ # Found \n
+ endpos = nlpos + 1
+ break
+ elif nlpos == crpos + 1:
+ # Found \r\n
+ endpos = crpos + 2
+ break
+ else:
+ # Found \r
+ endpos = crpos + 1
+ break
+ else:
+ # non-universal
+ pos = line.find(self._readnl)
+ if pos >= 0:
+ endpos = pos + len(self._readnl)
+ break
+
+ if limit >= 0 and len(line) >= limit:
+ endpos = limit # reached length limit
+ break
+
+ # No line ending seen yet - get more data'
+ while self._read_chunk():
+ if self._decoded_chars:
+ break
+ if self._decoded_chars:
+ line += self._get_decoded_chars()
+ else:
+ # end of file
+ self._set_decoded_chars('')
+ self._snapshot = None
+ return line
+
+ if limit >= 0 and endpos > limit:
+ endpos = limit # don't exceed limit
+
+ # Rewind _decoded_chars to just after the line ending we found.
+ self._rewind_decoded_chars(len(line) - endpos)
+ return line[:endpos]
+
+ @property
+ def newlines(self):
+ return self._decoder.newlines if self._decoder else None
+
+
+class StringIO(TextIOWrapper):
+ """Text I/O implementation using an in-memory buffer.
+
+ The initial_value argument sets the value of object. The newline
+ argument is like the one of TextIOWrapper's constructor.
+ """
+
+ def __init__(self, initial_value="", newline="\n"):
+ super(StringIO, self).__init__(BytesIO(),
+ encoding="utf-8",
+ errors="strict",
+ newline=newline)
+ # Issue #5645: make universal newlines semantics the same as in the
+ # C version, even under Windows.
+ if newline is None:
+ self._writetranslate = False
+ if initial_value:
+ if not isinstance(initial_value, unicode):
+ initial_value = unicode(initial_value)
+ self.write(initial_value)
+ self.seek(0)
+
+ def getvalue(self):
+ self.flush()
+ decoder = self._decoder or self._get_decoder()
+ old_state = decoder.getstate()
+ decoder.reset()
+ try:
+ return decoder.decode(self.buffer.getvalue(), final=True)
+ finally:
+ decoder.setstate(old_state)
+
+ def __repr__(self):
+ # TextIOWrapper tells the encoding in its repr. In StringIO,
+ # that's an implementation detail.
+ return object.__repr__(self)
+
+ @property
+ def errors(self):
+ return None
+
+ @property
+ def encoding(self):
+ return None
+
+ def detach(self):
+ # This doesn't make sense on StringIO.
+ self._unsupported("detach")
diff --git a/cashew/Lib/_strptime.py b/cashew/Lib/_strptime.py
new file mode 100644
index 0000000..8eb2718
--- /dev/null
+++ b/cashew/Lib/_strptime.py
@@ -0,0 +1,478 @@
+"""Strptime-related classes and functions.
+
+CLASSES:
+ LocaleTime -- Discovers and stores locale-specific time information
+ TimeRE -- Creates regexes for pattern matching a string of text containing
+ time information
+
+FUNCTIONS:
+ _getlang -- Figure out what language is being used for the locale
+ strptime -- Calculates the time struct represented by the passed-in string
+
+"""
+import time
+import locale
+import calendar
+from re import compile as re_compile
+from re import IGNORECASE
+from re import escape as re_escape
+from datetime import date as datetime_date
+try:
+ from thread import allocate_lock as _thread_allocate_lock
+except:
+ from dummy_thread import allocate_lock as _thread_allocate_lock
+
+__all__ = []
+
+def _getlang():
+ # Figure out what the current language is set to.
+ return locale.getlocale(locale.LC_TIME)
+
+class LocaleTime(object):
+ """Stores and handles locale-specific information related to time.
+
+ ATTRIBUTES:
+ f_weekday -- full weekday names (7-item list)
+ a_weekday -- abbreviated weekday names (7-item list)
+ f_month -- full month names (13-item list; dummy value in [0], which
+ is added by code)
+ a_month -- abbreviated month names (13-item list, dummy value in
+ [0], which is added by code)
+ am_pm -- AM/PM representation (2-item list)
+ LC_date_time -- format string for date/time representation (string)
+ LC_date -- format string for date representation (string)
+ LC_time -- format string for time representation (string)
+ timezone -- daylight- and non-daylight-savings timezone representation
+ (2-item list of sets)
+ lang -- Language used by instance (2-item tuple)
+ """
+
+ def __init__(self):
+ """Set all attributes.
+
+ Order of methods called matters for dependency reasons.
+
+ The locale language is set at the offset and then checked again before
+ exiting. This is to make sure that the attributes were not set with a
+ mix of information from more than one locale. This would most likely
+ happen when using threads where one thread calls a locale-dependent
+ function while another thread changes the locale while the function in
+ the other thread is still running. Proper coding would call for
+ locks to prevent changing the locale while locale-dependent code is
+ running. The check here is done in case someone does not think about
+ doing this.
+
+ Only other possible issue is if someone changed the timezone and did
+ not call tz.tzset . That is an issue for the programmer, though,
+ since changing the timezone is worthless without that call.
+
+ """
+ self.lang = _getlang()
+ self.__calc_weekday()
+ self.__calc_month()
+ self.__calc_am_pm()
+ self.__calc_timezone()
+ self.__calc_date_time()
+ if _getlang() != self.lang:
+ raise ValueError("locale changed during initialization")
+ if time.tzname != self.tzname or time.daylight != self.daylight:
+ raise ValueError("timezone changed during initialization")
+
+ def __pad(self, seq, front):
+ # Add '' to seq to either the front (is True), else the back.
+ seq = list(seq)
+ if front:
+ seq.insert(0, '')
+ else:
+ seq.append('')
+ return seq
+
+ def __calc_weekday(self):
+ # Set self.a_weekday and self.f_weekday using the calendar
+ # module.
+ a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
+ f_weekday = [calendar.day_name[i].lower() for i in range(7)]
+ self.a_weekday = a_weekday
+ self.f_weekday = f_weekday
+
+ def __calc_month(self):
+ # Set self.f_month and self.a_month using the calendar module.
+ a_month = [calendar.month_abbr[i].lower() for i in range(13)]
+ f_month = [calendar.month_name[i].lower() for i in range(13)]
+ self.a_month = a_month
+ self.f_month = f_month
+
+ def __calc_am_pm(self):
+ # Set self.am_pm by using time.strftime().
+
+ # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
+ # magical; just happened to have used it everywhere else where a
+ # static date was needed.
+ am_pm = []
+ for hour in (01,22):
+ time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
+ am_pm.append(time.strftime("%p", time_tuple).lower())
+ self.am_pm = am_pm
+
+ def __calc_date_time(self):
+ # Set self.date_time, self.date, & self.time by using
+ # time.strftime().
+
+ # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
+ # overloaded numbers is minimized. The order in which searches for
+ # values within the format string is very important; it eliminates
+ # possible ambiguity for what something represents.
+ time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
+ date_time = [None, None, None]
+ date_time[0] = time.strftime("%c", time_tuple).lower()
+ date_time[1] = time.strftime("%x", time_tuple).lower()
+ date_time[2] = time.strftime("%X", time_tuple).lower()
+ replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
+ (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
+ (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
+ ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
+ ('44', '%M'), ('55', '%S'), ('76', '%j'),
+ ('17', '%d'), ('03', '%m'), ('3', '%m'),
+ # '3' needed for when no leading zero.
+ ('2', '%w'), ('10', '%I')]
+ replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
+ for tz in tz_values])
+ for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
+ current_format = date_time[offset]
+ for old, new in replacement_pairs:
+ # Must deal with possible lack of locale info
+ # manifesting itself as the empty string (e.g., Swedish's
+ # lack of AM/PM info) or a platform returning a tuple of empty
+ # strings (e.g., MacOS 9 having timezone as ('','')).
+ if old:
+ current_format = current_format.replace(old, new)
+ # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
+ # 2005-01-03 occurs before the first Monday of the year. Otherwise
+ # %U is used.
+ time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
+ if '00' in time.strftime(directive, time_tuple):
+ U_W = '%W'
+ else:
+ U_W = '%U'
+ date_time[offset] = current_format.replace('11', U_W)
+ self.LC_date_time = date_time[0]
+ self.LC_date = date_time[1]
+ self.LC_time = date_time[2]
+
+ def __calc_timezone(self):
+ # Set self.timezone by using time.tzname.
+ # Do not worry about possibility of time.tzname[0] == time.tzname[1]
+ # and time.daylight; handle that in strptime.
+ try:
+ time.tzset()
+ except AttributeError:
+ pass
+ self.tzname = time.tzname
+ self.daylight = time.daylight
+ no_saving = frozenset(["utc", "gmt", self.tzname[0].lower()])
+ if self.daylight:
+ has_saving = frozenset([self.tzname[1].lower()])
+ else:
+ has_saving = frozenset()
+ self.timezone = (no_saving, has_saving)
+
+
+class TimeRE(dict):
+ """Handle conversion from format directives to regexes."""
+
+ def __init__(self, locale_time=None):
+ """Create keys/values.
+
+ Order of execution is important for dependency reasons.
+
+ """
+ if locale_time:
+ self.locale_time = locale_time
+ else:
+ self.locale_time = LocaleTime()
+ base = super(TimeRE, self)
+ base.__init__({
+ # The " \d" part of the regex is to make %c from ANSI C work
+ 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
+ 'f': r"(?P[0-9]{1,6})",
+ 'H': r"(?P2[0-3]|[0-1]\d|\d)",
+ 'I': r"(?P1[0-2]|0[1-9]|[1-9])",
+ 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
+ 'm': r"(?P1[0-2]|0[1-9]|[1-9])",
+ 'M': r"(?P[0-5]\d|\d)",
+ 'S': r"(?P6[0-1]|[0-5]\d|\d)",
+ 'U': r"(?P5[0-3]|[0-4]\d|\d)",
+ 'w': r"(?P[0-6])",
+ # W is set below by using 'U'
+ 'y': r"(?P\d\d)",
+ #XXX: Does 'Y' need to worry about having less or more than
+ # 4 digits?
+ 'Y': r"(?P\d\d\d\d)",
+ 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
+ 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
+ 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
+ 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
+ 'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
+ 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
+ for tz in tz_names),
+ 'Z'),
+ '%': '%'})
+ base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
+ base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
+ base.__setitem__('x', self.pattern(self.locale_time.LC_date))
+ base.__setitem__('X', self.pattern(self.locale_time.LC_time))
+
+ def __seqToRE(self, to_convert, directive):
+ """Convert a list to a regex string for matching a directive.
+
+ Want possible matching values to be from longest to shortest. This
+ prevents the possibility of a match occurring for a value that also
+ a substring of a larger value that should have matched (e.g., 'abc'
+ matching when 'abcdef' should have been the match).
+
+ """
+ to_convert = sorted(to_convert, key=len, reverse=True)
+ for value in to_convert:
+ if value != '':
+ break
+ else:
+ return ''
+ regex = '|'.join(re_escape(stuff) for stuff in to_convert)
+ regex = '(?P<%s>%s' % (directive, regex)
+ return '%s)' % regex
+
+ def pattern(self, format):
+ """Return regex pattern for the format string.
+
+ Need to make sure that any characters that might be interpreted as
+ regex syntax are escaped.
+
+ """
+ processed_format = ''
+ # The sub() call escapes all characters that might be misconstrued
+ # as regex syntax. Cannot use re.escape since we have to deal with
+ # format directives (%m, etc.).
+ regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
+ format = regex_chars.sub(r"\\\1", format)
+ whitespace_replacement = re_compile(r'\s+')
+ format = whitespace_replacement.sub(r'\\s+', format)
+ while '%' in format:
+ directive_index = format.index('%')+1
+ processed_format = "%s%s%s" % (processed_format,
+ format[:directive_index-1],
+ self[format[directive_index]])
+ format = format[directive_index+1:]
+ return "%s%s" % (processed_format, format)
+
+ def compile(self, format):
+ """Return a compiled re object for the format string."""
+ return re_compile(self.pattern(format), IGNORECASE)
+
+_cache_lock = _thread_allocate_lock()
+# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
+# first!
+_TimeRE_cache = TimeRE()
+_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
+_regex_cache = {}
+
+def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
+ """Calculate the Julian day based on the year, week of the year, and day of
+ the week, with week_start_day representing whether the week of the year
+ assumes the week starts on Sunday or Monday (6 or 0)."""
+ first_weekday = datetime_date(year, 1, 1).weekday()
+ # If we are dealing with the %U directive (week starts on Sunday), it's
+ # easier to just shift the view to Sunday being the first day of the
+ # week.
+ if not week_starts_Mon:
+ first_weekday = (first_weekday + 1) % 7
+ day_of_week = (day_of_week + 1) % 7
+ # Need to watch out for a week 0 (when the first day of the year is not
+ # the same as that specified by %U or %W).
+ week_0_length = (7 - first_weekday) % 7
+ if week_of_year == 0:
+ return 1 + day_of_week - first_weekday
+ else:
+ days_to_week = week_0_length + (7 * (week_of_year - 1))
+ return 1 + days_to_week + day_of_week
+
+
+def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
+ """Return a time struct based on the input string and the format string."""
+ global _TimeRE_cache, _regex_cache
+ with _cache_lock:
+ locale_time = _TimeRE_cache.locale_time
+ if (_getlang() != locale_time.lang or
+ time.tzname != locale_time.tzname or
+ time.daylight != locale_time.daylight):
+ _TimeRE_cache = TimeRE()
+ _regex_cache.clear()
+ locale_time = _TimeRE_cache.locale_time
+ if len(_regex_cache) > _CACHE_MAX_SIZE:
+ _regex_cache.clear()
+ format_regex = _regex_cache.get(format)
+ if not format_regex:
+ try:
+ format_regex = _TimeRE_cache.compile(format)
+ # KeyError raised when a bad format is found; can be specified as
+ # \\, in which case it was a stray % but with a space after it
+ except KeyError, err:
+ bad_directive = err.args[0]
+ if bad_directive == "\\":
+ bad_directive = "%"
+ del err
+ raise ValueError("'%s' is a bad directive in format '%s'" %
+ (bad_directive, format))
+ # IndexError only occurs when the format string is "%"
+ except IndexError:
+ raise ValueError("stray %% in format '%s'" % format)
+ _regex_cache[format] = format_regex
+ found = format_regex.match(data_string)
+ if not found:
+ raise ValueError("time data %r does not match format %r" %
+ (data_string, format))
+ if len(data_string) != found.end():
+ raise ValueError("unconverted data remains: %s" %
+ data_string[found.end():])
+
+ year = None
+ month = day = 1
+ hour = minute = second = fraction = 0
+ tz = -1
+ # Default to -1 to signify that values not known; not critical to have,
+ # though
+ week_of_year = -1
+ week_of_year_start = -1
+ # weekday and julian defaulted to None so as to signal need to calculate
+ # values
+ weekday = julian = None
+ found_dict = found.groupdict()
+ for group_key in found_dict.iterkeys():
+ # Directives not explicitly handled below:
+ # c, x, X
+ # handled by making out of other directives
+ # U, W
+ # worthless without day of the week
+ if group_key == 'y':
+ year = int(found_dict['y'])
+ # Open Group specification for strptime() states that a %y
+ #value in the range of [00, 68] is in the century 2000, while
+ #[69,99] is in the century 1900
+ if year <= 68:
+ year += 2000
+ else:
+ year += 1900
+ elif group_key == 'Y':
+ year = int(found_dict['Y'])
+ elif group_key == 'm':
+ month = int(found_dict['m'])
+ elif group_key == 'B':
+ month = locale_time.f_month.index(found_dict['B'].lower())
+ elif group_key == 'b':
+ month = locale_time.a_month.index(found_dict['b'].lower())
+ elif group_key == 'd':
+ day = int(found_dict['d'])
+ elif group_key == 'H':
+ hour = int(found_dict['H'])
+ elif group_key == 'I':
+ hour = int(found_dict['I'])
+ ampm = found_dict.get('p', '').lower()
+ # If there was no AM/PM indicator, we'll treat this like AM
+ if ampm in ('', locale_time.am_pm[0]):
+ # We're in AM so the hour is correct unless we're
+ # looking at 12 midnight.
+ # 12 midnight == 12 AM == hour 0
+ if hour == 12:
+ hour = 0
+ elif ampm == locale_time.am_pm[1]:
+ # We're in PM so we need to add 12 to the hour unless
+ # we're looking at 12 noon.
+ # 12 noon == 12 PM == hour 12
+ if hour != 12:
+ hour += 12
+ elif group_key == 'M':
+ minute = int(found_dict['M'])
+ elif group_key == 'S':
+ second = int(found_dict['S'])
+ elif group_key == 'f':
+ s = found_dict['f']
+ # Pad to always return microseconds.
+ s += "0" * (6 - len(s))
+ fraction = int(s)
+ elif group_key == 'A':
+ weekday = locale_time.f_weekday.index(found_dict['A'].lower())
+ elif group_key == 'a':
+ weekday = locale_time.a_weekday.index(found_dict['a'].lower())
+ elif group_key == 'w':
+ weekday = int(found_dict['w'])
+ if weekday == 0:
+ weekday = 6
+ else:
+ weekday -= 1
+ elif group_key == 'j':
+ julian = int(found_dict['j'])
+ elif group_key in ('U', 'W'):
+ week_of_year = int(found_dict[group_key])
+ if group_key == 'U':
+ # U starts week on Sunday.
+ week_of_year_start = 6
+ else:
+ # W starts week on Monday.
+ week_of_year_start = 0
+ elif group_key == 'Z':
+ # Since -1 is default value only need to worry about setting tz if
+ # it can be something other than -1.
+ found_zone = found_dict['Z'].lower()
+ for value, tz_values in enumerate(locale_time.timezone):
+ if found_zone in tz_values:
+ # Deal with bad locale setup where timezone names are the
+ # same and yet time.daylight is true; too ambiguous to
+ # be able to tell what timezone has daylight savings
+ if (time.tzname[0] == time.tzname[1] and
+ time.daylight and found_zone not in ("utc", "gmt")):
+ break
+ else:
+ tz = value
+ break
+ leap_year_fix = False
+ if year is None and month == 2 and day == 29:
+ year = 1904 # 1904 is first leap year of 20th century
+ leap_year_fix = True
+ elif year is None:
+ year = 1900
+ # If we know the week of the year and what day of that week, we can figure
+ # out the Julian day of the year.
+ if julian is None and week_of_year != -1 and weekday is not None:
+ week_starts_Mon = True if week_of_year_start == 0 else False
+ julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
+ week_starts_Mon)
+ if julian <= 0:
+ year -= 1
+ yday = 366 if calendar.isleap(year) else 365
+ julian += yday
+ # Cannot pre-calculate datetime_date() since can change in Julian
+ # calculation and thus could have different value for the day of the week
+ # calculation.
+ if julian is None:
+ # Need to add 1 to result since first day of the year is 1, not 0.
+ julian = datetime_date(year, month, day).toordinal() - \
+ datetime_date(year, 1, 1).toordinal() + 1
+ else: # Assume that if they bothered to include Julian day it will
+ # be accurate.
+ datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
+ year = datetime_result.year
+ month = datetime_result.month
+ day = datetime_result.day
+ if weekday is None:
+ weekday = datetime_date(year, month, day).weekday()
+ if leap_year_fix:
+ # the caller didn't supply a year but asked for Feb 29th. We couldn't
+ # use the default of 1900 for computations. We set it back to ensure
+ # that February 29th is smaller than March 1st.
+ year = 1900
+
+ return (time.struct_time((year, month, day,
+ hour, minute, second,
+ weekday, julian, tz)), fraction)
+
+def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
+ return _strptime(data_string, format)[0]
diff --git a/cashew/Lib/_threading_local.py b/cashew/Lib/_threading_local.py
new file mode 100644
index 0000000..1480329
--- /dev/null
+++ b/cashew/Lib/_threading_local.py
@@ -0,0 +1,247 @@
+"""Thread-local objects.
+
+(Note that this module provides a Python version of the threading.local
+ class. Depending on the version of Python you're using, there may be a
+ faster one available. You should always import the `local` class from
+ `threading`.)
+
+Thread-local objects support the management of thread-local data.
+If you have data that you want to be local to a thread, simply create
+a thread-local object and use its attributes:
+
+ >>> mydata = local()
+ >>> mydata.number = 42
+ >>> mydata.number
+ 42
+
+You can also access the local-object's dictionary:
+
+ >>> mydata.__dict__
+ {'number': 42}
+ >>> mydata.__dict__.setdefault('widgets', [])
+ []
+ >>> mydata.widgets
+ []
+
+What's important about thread-local objects is that their data are
+local to a thread. If we access the data in a different thread:
+
+ >>> log = []
+ >>> def f():
+ ... items = mydata.__dict__.items()
+ ... items.sort()
+ ... log.append(items)
+ ... mydata.number = 11
+ ... log.append(mydata.number)
+
+ >>> import threading
+ >>> thread = threading.Thread(target=f)
+ >>> thread.start()
+ >>> thread.join()
+ >>> log
+ [[], 11]
+
+we get different data. Furthermore, changes made in the other thread
+don't affect data seen in this thread:
+
+ >>> mydata.number
+ 42
+
+Of course, values you get from a local object, including a __dict__
+attribute, are for whatever thread was current at the time the
+attribute was read. For that reason, you generally don't want to save
+these values across threads, as they apply only to the thread they
+came from.
+
+You can create custom local objects by subclassing the local class:
+
+ >>> class MyLocal(local):
+ ... number = 2
+ ... def __init__(self, **kw):
+ ... self.__dict__.update(kw)
+ ... def squared(self):
+ ... return self.number ** 2
+
+This can be useful to support default values, methods and
+initialization. Note that if you define an __init__ method, it will be
+called each time the local object is used in a separate thread. This
+is necessary to initialize each thread's dictionary.
+
+Now if we create a local object:
+
+ >>> mydata = MyLocal(color='red')
+
+Now we have a default number:
+
+ >>> mydata.number
+ 2
+
+an initial color:
+
+ >>> mydata.color
+ 'red'
+ >>> del mydata.color
+
+And a method that operates on the data:
+
+ >>> mydata.squared()
+ 4
+
+As before, we can access the data in a separate thread:
+
+ >>> log = []
+ >>> thread = threading.Thread(target=f)
+ >>> thread.start()
+ >>> thread.join()
+ >>> log
+ [[('color', 'red')], 11]
+
+without affecting this thread's data:
+
+ >>> mydata.number
+ 2
+ >>> mydata.color
+ Traceback (most recent call last):
+ ...
+ AttributeError: 'MyLocal' object has no attribute 'color'
+
+Note that subclasses can define slots, but they are not thread
+local. They are shared across threads:
+
+ >>> class MyLocal(local):
+ ... __slots__ = 'number'
+
+ >>> mydata = MyLocal()
+ >>> mydata.number = 42
+ >>> mydata.color = 'red'
+
+So, the separate thread:
+
+ >>> thread = threading.Thread(target=f)
+ >>> thread.start()
+ >>> thread.join()
+
+affects what we see:
+
+ >>> mydata.number
+ 11
+
+>>> del mydata
+"""
+
+__all__ = ["local"]
+
+# We need to use objects from the threading module, but the threading
+# module may also want to use our `local` class, if support for locals
+# isn't compiled in to the `thread` module. This creates potential problems
+# with circular imports. For that reason, we don't import `threading`
+# until the bottom of this file (a hack sufficient to worm around the
+# potential problems). Note that almost all platforms do have support for
+# locals in the `thread` module, and there is no circular import problem
+# then, so problems introduced by fiddling the order of imports here won't
+# manifest on most boxes.
+
+class _localbase(object):
+ __slots__ = '_local__key', '_local__args', '_local__lock'
+
+ def __new__(cls, *args, **kw):
+ self = object.__new__(cls)
+ key = '_local__key', 'thread.local.' + str(id(self))
+ object.__setattr__(self, '_local__key', key)
+ object.__setattr__(self, '_local__args', (args, kw))
+ object.__setattr__(self, '_local__lock', RLock())
+
+ if (args or kw) and (cls.__init__ is object.__init__):
+ raise TypeError("Initialization arguments are not supported")
+
+ # We need to create the thread dict in anticipation of
+ # __init__ being called, to make sure we don't call it
+ # again ourselves.
+ dict = object.__getattribute__(self, '__dict__')
+ current_thread().__dict__[key] = dict
+
+ return self
+
+def _patch(self):
+ key = object.__getattribute__(self, '_local__key')
+ d = current_thread().__dict__.get(key)
+ if d is None:
+ d = {}
+ current_thread().__dict__[key] = d
+ object.__setattr__(self, '__dict__', d)
+
+ # we have a new instance dict, so call out __init__ if we have
+ # one
+ cls = type(self)
+ if cls.__init__ is not object.__init__:
+ args, kw = object.__getattribute__(self, '_local__args')
+ cls.__init__(self, *args, **kw)
+ else:
+ object.__setattr__(self, '__dict__', d)
+
+class local(_localbase):
+
+ def __getattribute__(self, name):
+ lock = object.__getattribute__(self, '_local__lock')
+ lock.acquire()
+ try:
+ _patch(self)
+ return object.__getattribute__(self, name)
+ finally:
+ lock.release()
+
+ def __setattr__(self, name, value):
+ if name == '__dict__':
+ raise AttributeError(
+ "%r object attribute '__dict__' is read-only"
+ % self.__class__.__name__)
+ lock = object.__getattribute__(self, '_local__lock')
+ lock.acquire()
+ try:
+ _patch(self)
+ return object.__setattr__(self, name, value)
+ finally:
+ lock.release()
+
+ def __delattr__(self, name):
+ if name == '__dict__':
+ raise AttributeError(
+ "%r object attribute '__dict__' is read-only"
+ % self.__class__.__name__)
+ lock = object.__getattribute__(self, '_local__lock')
+ lock.acquire()
+ try:
+ _patch(self)
+ return object.__delattr__(self, name)
+ finally:
+ lock.release()
+
+ def __del__(self):
+ import threading
+
+ key = object.__getattribute__(self, '_local__key')
+
+ try:
+ # We use the non-locking API since we might already hold the lock
+ # (__del__ can be called at any point by the cyclic GC).
+ threads = threading._enumerate()
+ except:
+ # If enumerating the current threads fails, as it seems to do
+ # during shutdown, we'll skip cleanup under the assumption
+ # that there is nothing to clean up.
+ return
+
+ for thread in threads:
+ try:
+ __dict__ = thread.__dict__
+ except AttributeError:
+ # Thread is dying, rest in peace.
+ continue
+
+ if key in __dict__:
+ try:
+ del __dict__[key]
+ except KeyError:
+ pass # didn't have anything in this thread
+
+from threading import current_thread, RLock
diff --git a/cashew/Lib/_weakrefset.py b/cashew/Lib/_weakrefset.py
new file mode 100644
index 0000000..be1b00c
--- /dev/null
+++ b/cashew/Lib/_weakrefset.py
@@ -0,0 +1,205 @@
+# Access WeakSet through the weakref module.
+# This code is separated-out because it is needed
+# by abc.py to load everything else at startup.
+
+from _weakref import ref
+
+__all__ = ['WeakSet']
+
+
+class _IterationGuard(object):
+ # This context manager registers itself in the current iterators of the
+ # weak container, such as to delay all removals until the context manager
+ # exits.
+ # This technique should be relatively thread-safe (since sets are).
+
+ def __init__(self, weakcontainer):
+ # Don't create cycles
+ self.weakcontainer = ref(weakcontainer)
+
+ def __enter__(self):
+ w = self.weakcontainer()
+ if w is not None:
+ w._iterating.add(self)
+ return self
+
+ def __exit__(self, e, t, b):
+ w = self.weakcontainer()
+ if w is not None:
+ s = w._iterating
+ s.remove(self)
+ if not s:
+ w._commit_removals()
+
+
+class WeakSet(object):
+ def __init__(self, data=None):
+ self.data = set()
+ def _remove(item, selfref=ref(self)):
+ self = selfref()
+ if self is not None:
+ if self._iterating:
+ self._pending_removals.append(item)
+ else:
+ self.data.discard(item)
+ self._remove = _remove
+ # A list of keys to be removed
+ self._pending_removals = []
+ self._iterating = set()
+ if data is not None:
+ self.update(data)
+
+ def _commit_removals(self):
+ l = self._pending_removals
+ discard = self.data.discard
+ while l:
+ discard(l.pop())
+
+ def __iter__(self):
+ with _IterationGuard(self):
+ for itemref in self.data:
+ item = itemref()
+ if item is not None:
+ # Caveat: the iterator will keep a strong reference to
+ # `item` until it is resumed or closed.
+ yield item
+
+ def __len__(self):
+ return len(self.data) - len(self._pending_removals)
+
+ def __contains__(self, item):
+ try:
+ wr = ref(item)
+ # Issue #266 - somehow item was freed before wr hash was calculated
+ return wr in self.data
+ except TypeError:
+ return False
+
+ def __reduce__(self):
+ return (self.__class__, (list(self),),
+ getattr(self, '__dict__', None))
+
+ __hash__ = None
+
+ def add(self, item):
+ if self._pending_removals:
+ self._commit_removals()
+ self.data.add(ref(item, self._remove))
+
+ def clear(self):
+ if self._pending_removals:
+ self._commit_removals()
+ self.data.clear()
+
+ def copy(self):
+ return self.__class__(self)
+
+ def pop(self):
+ if self._pending_removals:
+ self._commit_removals()
+ while True:
+ try:
+ itemref = self.data.pop()
+ except KeyError:
+ raise KeyError('pop from empty WeakSet')
+ item = itemref()
+ if item is not None:
+ return item
+
+ def remove(self, item):
+ if self._pending_removals:
+ self._commit_removals()
+ self.data.remove(ref(item))
+
+ def discard(self, item):
+ if self._pending_removals:
+ self._commit_removals()
+ self.data.discard(ref(item))
+
+ def update(self, other):
+ if self._pending_removals:
+ self._commit_removals()
+ for element in other:
+ self.add(element)
+
+ def __ior__(self, other):
+ self.update(other)
+ return self
+
+ def difference(self, other):
+ newset = self.copy()
+ newset.difference_update(other)
+ return newset
+ __sub__ = difference
+
+ def difference_update(self, other):
+ self.__isub__(other)
+ def __isub__(self, other):
+ if self._pending_removals:
+ self._commit_removals()
+ if self is other:
+ self.data.clear()
+ else:
+ self.data.difference_update(ref(item) for item in other)
+ return self
+
+ def intersection(self, other):
+ return self.__class__(item for item in other if item in self)
+ __and__ = intersection
+
+ def intersection_update(self, other):
+ self.__iand__(other)
+ def __iand__(self, other):
+ if self._pending_removals:
+ self._commit_removals()
+ self.data.intersection_update(ref(item) for item in other)
+ return self
+
+ def issubset(self, other):
+ return self.data.issubset(ref(item) for item in other)
+ __le__ = issubset
+
+ def __lt__(self, other):
+ return self.data < set(ref(item) for item in other)
+
+ def issuperset(self, other):
+ return self.data.issuperset(ref(item) for item in other)
+ __ge__ = issuperset
+
+ def __gt__(self, other):
+ return self.data > set(ref(item) for item in other)
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return self.data == set(ref(item) for item in other)
+
+ def __ne__(self, other):
+ opposite = self.__eq__(other)
+ if opposite is NotImplemented:
+ return NotImplemented
+ return not opposite
+
+ def symmetric_difference(self, other):
+ newset = self.copy()
+ newset.symmetric_difference_update(other)
+ return newset
+ __xor__ = symmetric_difference
+
+ def symmetric_difference_update(self, other):
+ self.__ixor__(other)
+ def __ixor__(self, other):
+ if self._pending_removals:
+ self._commit_removals()
+ if self is other:
+ self.data.clear()
+ else:
+ self.data.symmetric_difference_update(ref(item, self._remove) for item in other)
+ return self
+
+ def union(self, other):
+ return self.__class__(e for s in (self, other) for e in s)
+ __or__ = union
+
+ def isdisjoint(self, other):
+ return len(self.intersection(other)) == 0
diff --git a/cashew/Lib/abc.py b/cashew/Lib/abc.py
new file mode 100644
index 0000000..02e48a1
--- /dev/null
+++ b/cashew/Lib/abc.py
@@ -0,0 +1,185 @@
+# Copyright 2007 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Abstract Base Classes (ABCs) according to PEP 3119."""
+
+import types
+
+from _weakrefset import WeakSet
+
+# Instance of old-style class
+class _C: pass
+_InstanceType = type(_C())
+
+
+def abstractmethod(funcobj):
+ """A decorator indicating abstract methods.
+
+ Requires that the metaclass is ABCMeta or derived from it. A
+ class that has a metaclass derived from ABCMeta cannot be
+ instantiated unless all of its abstract methods are overridden.
+ The abstract methods can be called using any of the normal
+ 'super' call mechanisms.
+
+ Usage:
+
+ class C:
+ __metaclass__ = ABCMeta
+ @abstractmethod
+ def my_abstract_method(self, ...):
+ ...
+ """
+ funcobj.__isabstractmethod__ = True
+ return funcobj
+
+
+class abstractproperty(property):
+ """A decorator indicating abstract properties.
+
+ Requires that the metaclass is ABCMeta or derived from it. A
+ class that has a metaclass derived from ABCMeta cannot be
+ instantiated unless all of its abstract properties are overridden.
+ The abstract properties can be called using any of the normal
+ 'super' call mechanisms.
+
+ Usage:
+
+ class C:
+ __metaclass__ = ABCMeta
+ @abstractproperty
+ def my_abstract_property(self):
+ ...
+
+ This defines a read-only property; you can also define a read-write
+ abstract property using the 'long' form of property declaration:
+
+ class C:
+ __metaclass__ = ABCMeta
+ def getx(self): ...
+ def setx(self, value): ...
+ x = abstractproperty(getx, setx)
+ """
+ __isabstractmethod__ = True
+
+
+class ABCMeta(type):
+
+ """Metaclass for defining Abstract Base Classes (ABCs).
+
+ Use this metaclass to create an ABC. An ABC can be subclassed
+ directly, and then acts as a mix-in class. You can also register
+ unrelated concrete classes (even built-in classes) and unrelated
+ ABCs as 'virtual subclasses' -- these and their descendants will
+ be considered subclasses of the registering ABC by the built-in
+ issubclass() function, but the registering ABC won't show up in
+ their MRO (Method Resolution Order) nor will method
+ implementations defined by the registering ABC be callable (not
+ even via super()).
+
+ """
+
+ # A global counter that is incremented each time a class is
+ # registered as a virtual subclass of anything. It forces the
+ # negative cache to be cleared before its next use.
+ _abc_invalidation_counter = 0
+
+ def __new__(mcls, name, bases, namespace):
+ cls = super(ABCMeta, mcls).__new__(mcls, name, bases, namespace)
+ # Compute set of abstract method names
+ abstracts = set(name
+ for name, value in namespace.items()
+ if getattr(value, "__isabstractmethod__", False))
+ for base in bases:
+ for name in getattr(base, "__abstractmethods__", set()):
+ value = getattr(cls, name, None)
+ if getattr(value, "__isabstractmethod__", False):
+ abstracts.add(name)
+ cls.__abstractmethods__ = frozenset(abstracts)
+ # Set up inheritance registry
+ cls._abc_registry = WeakSet()
+ cls._abc_cache = WeakSet()
+ cls._abc_negative_cache = WeakSet()
+ cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
+ return cls
+
+ def register(cls, subclass):
+ """Register a virtual subclass of an ABC."""
+ if not isinstance(subclass, (type, types.ClassType)):
+ raise TypeError("Can only register classes")
+ if issubclass(subclass, cls):
+ return # Already a subclass
+ # Subtle: test for cycles *after* testing for "already a subclass";
+ # this means we allow X.register(X) and interpret it as a no-op.
+ if issubclass(cls, subclass):
+ # This would create a cycle, which is bad for the algorithm below
+ raise RuntimeError("Refusing to create an inheritance cycle")
+ cls._abc_registry.add(subclass)
+ ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache
+
+ def _dump_registry(cls, file=None):
+ """Debug helper to print the ABC registry."""
+ print >> file, "Class: %s.%s" % (cls.__module__, cls.__name__)
+ print >> file, "Inv.counter: %s" % ABCMeta._abc_invalidation_counter
+ for name in sorted(cls.__dict__.keys()):
+ if name.startswith("_abc_"):
+ value = getattr(cls, name)
+ print >> file, "%s: %r" % (name, value)
+
+ def __instancecheck__(cls, instance):
+ """Override for isinstance(instance, cls)."""
+ # Inline the cache checking when it's simple.
+ subclass = getattr(instance, '__class__', None)
+ if subclass is not None and subclass in cls._abc_cache:
+ return True
+ subtype = type(instance)
+ # Old-style instances
+ if subtype is _InstanceType:
+ subtype = subclass
+ if subtype is subclass or subclass is None:
+ if (cls._abc_negative_cache_version ==
+ ABCMeta._abc_invalidation_counter and
+ subtype in cls._abc_negative_cache):
+ return False
+ # Fall back to the subclass check.
+ return cls.__subclasscheck__(subtype)
+ return (cls.__subclasscheck__(subclass) or
+ cls.__subclasscheck__(subtype))
+
+ def __subclasscheck__(cls, subclass):
+ """Override for issubclass(subclass, cls)."""
+ # Check cache
+ if subclass in cls._abc_cache:
+ return True
+ # Check negative cache; may have to invalidate
+ if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter:
+ # Invalidate the negative cache
+ cls._abc_negative_cache = WeakSet()
+ cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
+ elif subclass in cls._abc_negative_cache:
+ return False
+ # Check the subclass hook
+ ok = cls.__subclasshook__(subclass)
+ if ok is not NotImplemented:
+ assert isinstance(ok, bool)
+ if ok:
+ cls._abc_cache.add(subclass)
+ else:
+ cls._abc_negative_cache.add(subclass)
+ return ok
+ # Check if it's a direct subclass
+ if cls in getattr(subclass, '__mro__', ()):
+ cls._abc_cache.add(subclass)
+ return True
+ # Check if it's a subclass of a registered class (recursive)
+ for rcls in cls._abc_registry:
+ if issubclass(subclass, rcls):
+ cls._abc_cache.add(subclass)
+ return True
+ # Check if it's a subclass of a subclass (recursive)
+ for scls in cls.__subclasses__():
+ if issubclass(subclass, scls):
+ cls._abc_cache.add(subclass)
+ return True
+ # No dice; update negative cache
+ cls._abc_negative_cache.add(subclass)
+ return False
diff --git a/cashew/Lib/aifc.py b/cashew/Lib/aifc.py
new file mode 100644
index 0000000..981f801
--- /dev/null
+++ b/cashew/Lib/aifc.py
@@ -0,0 +1,1000 @@
+"""Stuff to parse AIFF-C and AIFF files.
+
+Unless explicitly stated otherwise, the description below is true
+both for AIFF-C files and AIFF files.
+
+An AIFF-C file has the following structure.
+
+ +-----------------+
+ | FORM |
+ +-----------------+
+ | |
+ +----+------------+
+ | | AIFC |
+ | +------------+
+ | | |
+ | | . |
+ | | . |
+ | | . |
+ +----+------------+
+
+An AIFF file has the string "AIFF" instead of "AIFC".
+
+A chunk consists of an identifier (4 bytes) followed by a size (4 bytes,
+big endian order), followed by the data. The size field does not include
+the size of the 8 byte header.
+
+The following chunk types are recognized.
+
+ FVER
+ (AIFF-C only).
+ MARK
+ <# of markers> (2 bytes)
+ list of markers:
+ (2 bytes, must be > 0)
+ (4 bytes)
+ ("pstring")
+ COMM
+ <# of channels> (2 bytes)
+ <# of sound frames> (4 bytes)
+ (2 bytes)
+ (10 bytes, IEEE 80-bit extended
+ floating point)
+ in AIFF-C files only:
+ (4 bytes)
+ ("pstring")
+ SSND
+ (4 bytes, not used by this program)
+ (4 bytes, not used by this program)
+
+
+A pstring consists of 1 byte length, a string of characters, and 0 or 1
+byte pad to make the total length even.
+
+Usage.
+
+Reading AIFF files:
+ f = aifc.open(file, 'r')
+where file is either the name of a file or an open file pointer.
+The open file pointer must have methods read(), seek(), and close().
+In some types of audio files, if the setpos() method is not used,
+the seek() method is not necessary.
+
+This returns an instance of a class with the following public methods:
+ getnchannels() -- returns number of audio channels (1 for
+ mono, 2 for stereo)
+ getsampwidth() -- returns sample width in bytes
+ getframerate() -- returns sampling frequency
+ getnframes() -- returns number of audio frames
+ getcomptype() -- returns compression type ('NONE' for AIFF files)
+ getcompname() -- returns human-readable version of
+ compression type ('not compressed' for AIFF files)
+ getparams() -- returns a tuple consisting of all of the
+ above in the above order
+ getmarkers() -- get the list of marks in the audio file or None
+ if there are no marks
+ getmark(id) -- get mark with the specified id (raises an error
+ if the mark does not exist)
+ readframes(n) -- returns at most n frames of audio
+ rewind() -- rewind to the beginning of the audio stream
+ setpos(pos) -- seek to the specified position
+ tell() -- return the current position
+ close() -- close the instance (make it unusable)
+The position returned by tell(), the position given to setpos() and
+the position of marks are all compatible and have nothing to do with
+the actual position in the file.
+The close() method is called automatically when the class instance
+is destroyed.
+
+Writing AIFF files:
+ f = aifc.open(file, 'w')
+where file is either the name of a file or an open file pointer.
+The open file pointer must have methods write(), tell(), seek(), and
+close().
+
+This returns an instance of a class with the following public methods:
+ aiff() -- create an AIFF file (AIFF-C default)
+ aifc() -- create an AIFF-C file
+ setnchannels(n) -- set the number of channels
+ setsampwidth(n) -- set the sample width
+ setframerate(n) -- set the frame rate
+ setnframes(n) -- set the number of frames
+ setcomptype(type, name)
+ -- set the compression type and the
+ human-readable compression type
+ setparams(tuple)
+ -- set all parameters at once
+ setmark(id, pos, name)
+ -- add specified mark to the list of marks
+ tell() -- return current position in output file (useful
+ in combination with setmark())
+ writeframesraw(data)
+ -- write audio frames without pathing up the
+ file header
+ writeframes(data)
+ -- write audio frames and patch up the file header
+ close() -- patch up the file header and close the
+ output file
+You should set the parameters before the first writeframesraw or
+writeframes. The total number of frames does not need to be set,
+but when it is set to the correct value, the header does not have to
+be patched up.
+It is best to first set all parameters, perhaps possibly the
+compression type, and then write audio frames using writeframesraw.
+When all frames have been written, either call writeframes('') or
+close() to patch up the sizes in the header.
+Marks can be added anytime. If there are any marks, you must call
+close() after all frames have been written.
+The close() method is called automatically when the class instance
+is destroyed.
+
+When a file is opened with the extension '.aiff', an AIFF file is
+written, otherwise an AIFF-C file is written. This default can be
+changed by calling aiff() or aifc() before the first writeframes or
+writeframesraw.
+"""
+
+import struct
+import __builtin__
+
+__all__ = ["Error","open","openfp"]
+
+class Error(Exception):
+ pass
+
+_AIFC_version = 0xA2805140L # Version 1 of AIFF-C
+
+def _read_long(file):
+ try:
+ return struct.unpack('>l', file.read(4))[0]
+ except struct.error:
+ raise EOFError
+
+def _read_ulong(file):
+ try:
+ return struct.unpack('>L', file.read(4))[0]
+ except struct.error:
+ raise EOFError
+
+def _read_short(file):
+ try:
+ return struct.unpack('>h', file.read(2))[0]
+ except struct.error:
+ raise EOFError
+
+def _read_ushort(file):
+ try:
+ return struct.unpack('>H', file.read(2))[0]
+ except struct.error:
+ raise EOFError
+
+def _read_string(file):
+ length = ord(file.read(1))
+ if length == 0:
+ data = ''
+ else:
+ data = file.read(length)
+ if length & 1 == 0:
+ dummy = file.read(1)
+ return data
+
+_HUGE_VAL = 1.79769313486231e+308 # See
+
+def _read_float(f): # 10 bytes
+ expon = _read_short(f) # 2 bytes
+ sign = 1
+ if expon < 0:
+ sign = -1
+ expon = expon + 0x8000
+ himant = _read_ulong(f) # 4 bytes
+ lomant = _read_ulong(f) # 4 bytes
+ if expon == himant == lomant == 0:
+ f = 0.0
+ elif expon == 0x7FFF:
+ f = _HUGE_VAL
+ else:
+ expon = expon - 16383
+ f = (himant * 0x100000000L + lomant) * pow(2.0, expon - 63)
+ return sign * f
+
+def _write_short(f, x):
+ f.write(struct.pack('>h', x))
+
+def _write_ushort(f, x):
+ f.write(struct.pack('>H', x))
+
+def _write_long(f, x):
+ f.write(struct.pack('>l', x))
+
+def _write_ulong(f, x):
+ f.write(struct.pack('>L', x))
+
+def _write_string(f, s):
+ if len(s) > 255:
+ raise ValueError("string exceeds maximum pstring length")
+ f.write(struct.pack('B', len(s)))
+ f.write(s)
+ if len(s) & 1 == 0:
+ f.write(chr(0))
+
+def _write_float(f, x):
+ import math
+ if x < 0:
+ sign = 0x8000
+ x = x * -1
+ else:
+ sign = 0
+ if x == 0:
+ expon = 0
+ himant = 0
+ lomant = 0
+ else:
+ fmant, expon = math.frexp(x)
+ if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN
+ expon = sign|0x7FFF
+ himant = 0
+ lomant = 0
+ else: # Finite
+ expon = expon + 16382
+ if expon < 0: # denormalized
+ fmant = math.ldexp(fmant, expon)
+ expon = 0
+ expon = expon | sign
+ fmant = math.ldexp(fmant, 32)
+ fsmant = math.floor(fmant)
+ himant = long(fsmant)
+ fmant = math.ldexp(fmant - fsmant, 32)
+ fsmant = math.floor(fmant)
+ lomant = long(fsmant)
+ _write_ushort(f, expon)
+ _write_ulong(f, himant)
+ _write_ulong(f, lomant)
+
+from chunk import Chunk
+
+class Aifc_read:
+ # Variables used in this class:
+ #
+ # These variables are available to the user though appropriate
+ # methods of this class:
+ # _file -- the open file with methods read(), close(), and seek()
+ # set through the __init__() method
+ # _nchannels -- the number of audio channels
+ # available through the getnchannels() method
+ # _nframes -- the number of audio frames
+ # available through the getnframes() method
+ # _sampwidth -- the number of bytes per audio sample
+ # available through the getsampwidth() method
+ # _framerate -- the sampling frequency
+ # available through the getframerate() method
+ # _comptype -- the AIFF-C compression type ('NONE' if AIFF)
+ # available through the getcomptype() method
+ # _compname -- the human-readable AIFF-C compression type
+ # available through the getcomptype() method
+ # _markers -- the marks in the audio file
+ # available through the getmarkers() and getmark()
+ # methods
+ # _soundpos -- the position in the audio stream
+ # available through the tell() method, set through the
+ # setpos() method
+ #
+ # These variables are used internally only:
+ # _version -- the AIFF-C version number
+ # _decomp -- the decompressor from builtin module cl
+ # _comm_chunk_read -- 1 iff the COMM chunk has been read
+ # _aifc -- 1 iff reading an AIFF-C file
+ # _ssnd_seek_needed -- 1 iff positioned correctly in audio
+ # file for readframes()
+ # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk
+ # _framesize -- size of one frame in the file
+
+ _file = None # Set here since __del__ checks it
+
+ def initfp(self, file):
+ self._version = 0
+ self._decomp = None
+ self._convert = None
+ self._markers = []
+ self._soundpos = 0
+ self._file = file
+ chunk = Chunk(file)
+ if chunk.getname() != 'FORM':
+ raise Error, 'file does not start with FORM id'
+ formdata = chunk.read(4)
+ if formdata == 'AIFF':
+ self._aifc = 0
+ elif formdata == 'AIFC':
+ self._aifc = 1
+ else:
+ raise Error, 'not an AIFF or AIFF-C file'
+ self._comm_chunk_read = 0
+ self._ssnd_chunk = None
+ while 1:
+ self._ssnd_seek_needed = 1
+ try:
+ chunk = Chunk(self._file)
+ except EOFError:
+ break
+ chunkname = chunk.getname()
+ if chunkname == 'COMM':
+ self._read_comm_chunk(chunk)
+ self._comm_chunk_read = 1
+ elif chunkname == 'SSND':
+ self._ssnd_chunk = chunk
+ dummy = chunk.read(8)
+ self._ssnd_seek_needed = 0
+ elif chunkname == 'FVER':
+ self._version = _read_ulong(chunk)
+ elif chunkname == 'MARK':
+ self._readmark(chunk)
+ chunk.skip()
+ if not self._comm_chunk_read or not self._ssnd_chunk:
+ raise Error, 'COMM chunk and/or SSND chunk missing'
+ if self._aifc and self._decomp:
+ import cl
+ params = [cl.ORIGINAL_FORMAT, 0,
+ cl.BITS_PER_COMPONENT, self._sampwidth * 8,
+ cl.FRAME_RATE, self._framerate]
+ if self._nchannels == 1:
+ params[1] = cl.MONO
+ elif self._nchannels == 2:
+ params[1] = cl.STEREO_INTERLEAVED
+ else:
+ raise Error, 'cannot compress more than 2 channels'
+ self._decomp.SetParams(params)
+
+ def __init__(self, f):
+ if isinstance(f, basestring):
+ f = __builtin__.open(f, 'rb')
+ try:
+ self.initfp(f)
+ except:
+ f.close()
+ raise
+ else:
+ # assume it is an open file object already
+ self.initfp(f)
+
+ #
+ # User visible methods.
+ #
+ def getfp(self):
+ return self._file
+
+ def rewind(self):
+ self._ssnd_seek_needed = 1
+ self._soundpos = 0
+
+ def close(self):
+ decomp = self._decomp
+ try:
+ if decomp:
+ self._decomp = None
+ decomp.CloseDecompressor()
+ finally:
+ self._file.close()
+
+ def tell(self):
+ return self._soundpos
+
+ def getnchannels(self):
+ return self._nchannels
+
+ def getnframes(self):
+ return self._nframes
+
+ def getsampwidth(self):
+ return self._sampwidth
+
+ def getframerate(self):
+ return self._framerate
+
+ def getcomptype(self):
+ return self._comptype
+
+ def getcompname(self):
+ return self._compname
+
+## def getversion(self):
+## return self._version
+
+ def getparams(self):
+ return self.getnchannels(), self.getsampwidth(), \
+ self.getframerate(), self.getnframes(), \
+ self.getcomptype(), self.getcompname()
+
+ def getmarkers(self):
+ if len(self._markers) == 0:
+ return None
+ return self._markers
+
+ def getmark(self, id):
+ for marker in self._markers:
+ if id == marker[0]:
+ return marker
+ raise Error, 'marker %r does not exist' % (id,)
+
+ def setpos(self, pos):
+ if pos < 0 or pos > self._nframes:
+ raise Error, 'position not in range'
+ self._soundpos = pos
+ self._ssnd_seek_needed = 1
+
+ def readframes(self, nframes):
+ if self._ssnd_seek_needed:
+ self._ssnd_chunk.seek(0)
+ dummy = self._ssnd_chunk.read(8)
+ pos = self._soundpos * self._framesize
+ if pos:
+ self._ssnd_chunk.seek(pos + 8)
+ self._ssnd_seek_needed = 0
+ if nframes == 0:
+ return ''
+ data = self._ssnd_chunk.read(nframes * self._framesize)
+ if self._convert and data:
+ data = self._convert(data)
+ self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth)
+ return data
+
+ #
+ # Internal methods.
+ #
+
+ def _decomp_data(self, data):
+ import cl
+ dummy = self._decomp.SetParam(cl.FRAME_BUFFER_SIZE,
+ len(data) * 2)
+ return self._decomp.Decompress(len(data) // self._nchannels,
+ data)
+
+ def _ulaw2lin(self, data):
+ import audioop
+ return audioop.ulaw2lin(data, 2)
+
+ def _adpcm2lin(self, data):
+ import audioop
+ if not hasattr(self, '_adpcmstate'):
+ # first time
+ self._adpcmstate = None
+ data, self._adpcmstate = audioop.adpcm2lin(data, 2,
+ self._adpcmstate)
+ return data
+
+ def _read_comm_chunk(self, chunk):
+ self._nchannels = _read_short(chunk)
+ self._nframes = _read_long(chunk)
+ self._sampwidth = (_read_short(chunk) + 7) // 8
+ self._framerate = int(_read_float(chunk))
+ self._framesize = self._nchannels * self._sampwidth
+ if self._aifc:
+ #DEBUG: SGI's soundeditor produces a bad size :-(
+ kludge = 0
+ if chunk.chunksize == 18:
+ kludge = 1
+ print 'Warning: bad COMM chunk size'
+ chunk.chunksize = 23
+ #DEBUG end
+ self._comptype = chunk.read(4)
+ #DEBUG start
+ if kludge:
+ length = ord(chunk.file.read(1))
+ if length & 1 == 0:
+ length = length + 1
+ chunk.chunksize = chunk.chunksize + length
+ chunk.file.seek(-1, 1)
+ #DEBUG end
+ self._compname = _read_string(chunk)
+ if self._comptype != 'NONE':
+ if self._comptype == 'G722':
+ try:
+ import audioop
+ except ImportError:
+ pass
+ else:
+ self._convert = self._adpcm2lin
+ self._sampwidth = 2
+ return
+ # for ULAW and ALAW try Compression Library
+ try:
+ import cl
+ except ImportError:
+ if self._comptype in ('ULAW', 'ulaw'):
+ try:
+ import audioop
+ self._convert = self._ulaw2lin
+ self._sampwidth = 2
+ return
+ except ImportError:
+ pass
+ raise Error, 'cannot read compressed AIFF-C files'
+ if self._comptype in ('ULAW', 'ulaw'):
+ scheme = cl.G711_ULAW
+ elif self._comptype in ('ALAW', 'alaw'):
+ scheme = cl.G711_ALAW
+ else:
+ raise Error, 'unsupported compression type'
+ self._decomp = cl.OpenDecompressor(scheme)
+ self._convert = self._decomp_data
+ self._sampwidth = 2
+ else:
+ self._comptype = 'NONE'
+ self._compname = 'not compressed'
+
+ def _readmark(self, chunk):
+ nmarkers = _read_short(chunk)
+ # Some files appear to contain invalid counts.
+ # Cope with this by testing for EOF.
+ try:
+ for i in range(nmarkers):
+ id = _read_short(chunk)
+ pos = _read_long(chunk)
+ name = _read_string(chunk)
+ if pos or name:
+ # some files appear to have
+ # dummy markers consisting of
+ # a position 0 and name ''
+ self._markers.append((id, pos, name))
+ except EOFError:
+ print 'Warning: MARK chunk contains only',
+ print len(self._markers),
+ if len(self._markers) == 1: print 'marker',
+ else: print 'markers',
+ print 'instead of', nmarkers
+
+class Aifc_write:
+ # Variables used in this class:
+ #
+ # These variables are user settable through appropriate methods
+ # of this class:
+ # _file -- the open file with methods write(), close(), tell(), seek()
+ # set through the __init__() method
+ # _comptype -- the AIFF-C compression type ('NONE' in AIFF)
+ # set through the setcomptype() or setparams() method
+ # _compname -- the human-readable AIFF-C compression type
+ # set through the setcomptype() or setparams() method
+ # _nchannels -- the number of audio channels
+ # set through the setnchannels() or setparams() method
+ # _sampwidth -- the number of bytes per audio sample
+ # set through the setsampwidth() or setparams() method
+ # _framerate -- the sampling frequency
+ # set through the setframerate() or setparams() method
+ # _nframes -- the number of audio frames written to the header
+ # set through the setnframes() or setparams() method
+ # _aifc -- whether we're writing an AIFF-C file or an AIFF file
+ # set through the aifc() method, reset through the
+ # aiff() method
+ #
+ # These variables are used internally only:
+ # _version -- the AIFF-C version number
+ # _comp -- the compressor from builtin module cl
+ # _nframeswritten -- the number of audio frames actually written
+ # _datalength -- the size of the audio samples written to the header
+ # _datawritten -- the size of the audio samples actually written
+
+ _file = None # Set here since __del__ checks it
+
+ def __init__(self, f):
+ if isinstance(f, basestring):
+ filename = f
+ f = __builtin__.open(f, 'wb')
+ else:
+ # else, assume it is an open file object already
+ filename = '???'
+ self.initfp(f)
+ if filename[-5:] == '.aiff':
+ self._aifc = 0
+ else:
+ self._aifc = 1
+
+ def initfp(self, file):
+ self._file = file
+ self._version = _AIFC_version
+ self._comptype = 'NONE'
+ self._compname = 'not compressed'
+ self._comp = None
+ self._convert = None
+ self._nchannels = 0
+ self._sampwidth = 0
+ self._framerate = 0
+ self._nframes = 0
+ self._nframeswritten = 0
+ self._datawritten = 0
+ self._datalength = 0
+ self._markers = []
+ self._marklength = 0
+ self._aifc = 1 # AIFF-C is default
+
+ def __del__(self):
+ if self._file:
+ self.close()
+
+ #
+ # User visible methods.
+ #
+ def aiff(self):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ self._aifc = 0
+
+ def aifc(self):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ self._aifc = 1
+
+ def setnchannels(self, nchannels):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ if nchannels < 1:
+ raise Error, 'bad # of channels'
+ self._nchannels = nchannels
+
+ def getnchannels(self):
+ if not self._nchannels:
+ raise Error, 'number of channels not set'
+ return self._nchannels
+
+ def setsampwidth(self, sampwidth):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ if sampwidth < 1 or sampwidth > 4:
+ raise Error, 'bad sample width'
+ self._sampwidth = sampwidth
+
+ def getsampwidth(self):
+ if not self._sampwidth:
+ raise Error, 'sample width not set'
+ return self._sampwidth
+
+ def setframerate(self, framerate):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ if framerate <= 0:
+ raise Error, 'bad frame rate'
+ self._framerate = framerate
+
+ def getframerate(self):
+ if not self._framerate:
+ raise Error, 'frame rate not set'
+ return self._framerate
+
+ def setnframes(self, nframes):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ self._nframes = nframes
+
+ def getnframes(self):
+ return self._nframeswritten
+
+ def setcomptype(self, comptype, compname):
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ if comptype not in ('NONE', 'ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'):
+ raise Error, 'unsupported compression type'
+ self._comptype = comptype
+ self._compname = compname
+
+ def getcomptype(self):
+ return self._comptype
+
+ def getcompname(self):
+ return self._compname
+
+## def setversion(self, version):
+## if self._nframeswritten:
+## raise Error, 'cannot change parameters after starting to write'
+## self._version = version
+
+ def setparams(self, info):
+ nchannels, sampwidth, framerate, nframes, comptype, compname = info
+ if self._nframeswritten:
+ raise Error, 'cannot change parameters after starting to write'
+ if comptype not in ('NONE', 'ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'):
+ raise Error, 'unsupported compression type'
+ self.setnchannels(nchannels)
+ self.setsampwidth(sampwidth)
+ self.setframerate(framerate)
+ self.setnframes(nframes)
+ self.setcomptype(comptype, compname)
+
+ def getparams(self):
+ if not self._nchannels or not self._sampwidth or not self._framerate:
+ raise Error, 'not all parameters set'
+ return self._nchannels, self._sampwidth, self._framerate, \
+ self._nframes, self._comptype, self._compname
+
+ def setmark(self, id, pos, name):
+ if id <= 0:
+ raise Error, 'marker ID must be > 0'
+ if pos < 0:
+ raise Error, 'marker position must be >= 0'
+ if type(name) != type(''):
+ raise Error, 'marker name must be a string'
+ for i in range(len(self._markers)):
+ if id == self._markers[i][0]:
+ self._markers[i] = id, pos, name
+ return
+ self._markers.append((id, pos, name))
+
+ def getmark(self, id):
+ for marker in self._markers:
+ if id == marker[0]:
+ return marker
+ raise Error, 'marker %r does not exist' % (id,)
+
+ def getmarkers(self):
+ if len(self._markers) == 0:
+ return None
+ return self._markers
+
+ def tell(self):
+ return self._nframeswritten
+
+ def writeframesraw(self, data):
+ self._ensure_header_written(len(data))
+ nframes = len(data) // (self._sampwidth * self._nchannels)
+ if self._convert:
+ data = self._convert(data)
+ self._file.write(data)
+ self._nframeswritten = self._nframeswritten + nframes
+ self._datawritten = self._datawritten + len(data)
+
+ def writeframes(self, data):
+ self.writeframesraw(data)
+ if self._nframeswritten != self._nframes or \
+ self._datalength != self._datawritten:
+ self._patchheader()
+
+ def close(self):
+ if self._file is None:
+ return
+ try:
+ self._ensure_header_written(0)
+ if self._datawritten & 1:
+ # quick pad to even size
+ self._file.write(chr(0))
+ self._datawritten = self._datawritten + 1
+ self._writemarkers()
+ if self._nframeswritten != self._nframes or \
+ self._datalength != self._datawritten or \
+ self._marklength:
+ self._patchheader()
+ if self._comp:
+ self._comp.CloseCompressor()
+ self._comp = None
+ finally:
+ # Prevent ref cycles
+ self._convert = None
+ f = self._file
+ self._file = None
+ f.close()
+
+ #
+ # Internal methods.
+ #
+
+ def _comp_data(self, data):
+ import cl
+ dummy = self._comp.SetParam(cl.FRAME_BUFFER_SIZE, len(data))
+ dummy = self._comp.SetParam(cl.COMPRESSED_BUFFER_SIZE, len(data))
+ return self._comp.Compress(self._nframes, data)
+
+ def _lin2ulaw(self, data):
+ import audioop
+ return audioop.lin2ulaw(data, 2)
+
+ def _lin2adpcm(self, data):
+ import audioop
+ if not hasattr(self, '_adpcmstate'):
+ self._adpcmstate = None
+ data, self._adpcmstate = audioop.lin2adpcm(data, 2,
+ self._adpcmstate)
+ return data
+
+ def _ensure_header_written(self, datasize):
+ if not self._nframeswritten:
+ if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw'):
+ if not self._sampwidth:
+ self._sampwidth = 2
+ if self._sampwidth != 2:
+ raise Error, 'sample width must be 2 when compressing with ULAW or ALAW'
+ if self._comptype == 'G722':
+ if not self._sampwidth:
+ self._sampwidth = 2
+ if self._sampwidth != 2:
+ raise Error, 'sample width must be 2 when compressing with G7.22 (ADPCM)'
+ if not self._nchannels:
+ raise Error, '# channels not specified'
+ if not self._sampwidth:
+ raise Error, 'sample width not specified'
+ if not self._framerate:
+ raise Error, 'sampling rate not specified'
+ self._write_header(datasize)
+
+ def _init_compression(self):
+ if self._comptype == 'G722':
+ self._convert = self._lin2adpcm
+ return
+ try:
+ import cl
+ except ImportError:
+ if self._comptype in ('ULAW', 'ulaw'):
+ try:
+ import audioop
+ self._convert = self._lin2ulaw
+ return
+ except ImportError:
+ pass
+ raise Error, 'cannot write compressed AIFF-C files'
+ if self._comptype in ('ULAW', 'ulaw'):
+ scheme = cl.G711_ULAW
+ elif self._comptype in ('ALAW', 'alaw'):
+ scheme = cl.G711_ALAW
+ else:
+ raise Error, 'unsupported compression type'
+ self._comp = cl.OpenCompressor(scheme)
+ params = [cl.ORIGINAL_FORMAT, 0,
+ cl.BITS_PER_COMPONENT, self._sampwidth * 8,
+ cl.FRAME_RATE, self._framerate,
+ cl.FRAME_BUFFER_SIZE, 100,
+ cl.COMPRESSED_BUFFER_SIZE, 100]
+ if self._nchannels == 1:
+ params[1] = cl.MONO
+ elif self._nchannels == 2:
+ params[1] = cl.STEREO_INTERLEAVED
+ else:
+ raise Error, 'cannot compress more than 2 channels'
+ self._comp.SetParams(params)
+ # the compressor produces a header which we ignore
+ dummy = self._comp.Compress(0, '')
+ self._convert = self._comp_data
+
+ def _write_header(self, initlength):
+ if self._aifc and self._comptype != 'NONE':
+ self._init_compression()
+ self._file.write('FORM')
+ if not self._nframes:
+ self._nframes = initlength // (self._nchannels * self._sampwidth)
+ self._datalength = self._nframes * self._nchannels * self._sampwidth
+ if self._datalength & 1:
+ self._datalength = self._datalength + 1
+ if self._aifc:
+ if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw'):
+ self._datalength = self._datalength // 2
+ if self._datalength & 1:
+ self._datalength = self._datalength + 1
+ elif self._comptype == 'G722':
+ self._datalength = (self._datalength + 3) // 4
+ if self._datalength & 1:
+ self._datalength = self._datalength + 1
+ try:
+ self._form_length_pos = self._file.tell()
+ except (AttributeError, IOError):
+ self._form_length_pos = None
+ commlength = self._write_form_length(self._datalength)
+ if self._aifc:
+ self._file.write('AIFC')
+ self._file.write('FVER')
+ _write_ulong(self._file, 4)
+ _write_ulong(self._file, self._version)
+ else:
+ self._file.write('AIFF')
+ self._file.write('COMM')
+ _write_ulong(self._file, commlength)
+ _write_short(self._file, self._nchannels)
+ if self._form_length_pos is not None:
+ self._nframes_pos = self._file.tell()
+ _write_ulong(self._file, self._nframes)
+ if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'):
+ _write_short(self._file, 8)
+ else:
+ _write_short(self._file, self._sampwidth * 8)
+ _write_float(self._file, self._framerate)
+ if self._aifc:
+ self._file.write(self._comptype)
+ _write_string(self._file, self._compname)
+ self._file.write('SSND')
+ if self._form_length_pos is not None:
+ self._ssnd_length_pos = self._file.tell()
+ _write_ulong(self._file, self._datalength + 8)
+ _write_ulong(self._file, 0)
+ _write_ulong(self._file, 0)
+
+ def _write_form_length(self, datalength):
+ if self._aifc:
+ commlength = 18 + 5 + len(self._compname)
+ if commlength & 1:
+ commlength = commlength + 1
+ verslength = 12
+ else:
+ commlength = 18
+ verslength = 0
+ _write_ulong(self._file, 4 + verslength + self._marklength + \
+ 8 + commlength + 16 + datalength)
+ return commlength
+
+ def _patchheader(self):
+ curpos = self._file.tell()
+ if self._datawritten & 1:
+ datalength = self._datawritten + 1
+ self._file.write(chr(0))
+ else:
+ datalength = self._datawritten
+ if datalength == self._datalength and \
+ self._nframes == self._nframeswritten and \
+ self._marklength == 0:
+ self._file.seek(curpos, 0)
+ return
+ self._file.seek(self._form_length_pos, 0)
+ dummy = self._write_form_length(datalength)
+ self._file.seek(self._nframes_pos, 0)
+ _write_ulong(self._file, self._nframeswritten)
+ self._file.seek(self._ssnd_length_pos, 0)
+ _write_ulong(self._file, datalength + 8)
+ self._file.seek(curpos, 0)
+ self._nframes = self._nframeswritten
+ self._datalength = datalength
+
+ def _writemarkers(self):
+ if len(self._markers) == 0:
+ return
+ self._file.write('MARK')
+ length = 2
+ for marker in self._markers:
+ id, pos, name = marker
+ length = length + len(name) + 1 + 6
+ if len(name) & 1 == 0:
+ length = length + 1
+ _write_ulong(self._file, length)
+ self._marklength = length + 8
+ _write_short(self._file, len(self._markers))
+ for marker in self._markers:
+ id, pos, name = marker
+ _write_short(self._file, id)
+ _write_ulong(self._file, pos)
+ _write_string(self._file, name)
+
+def open(f, mode=None):
+ if mode is None:
+ if hasattr(f, 'mode'):
+ mode = f.mode
+ else:
+ mode = 'rb'
+ if mode in ('r', 'rb'):
+ return Aifc_read(f)
+ elif mode in ('w', 'wb'):
+ return Aifc_write(f)
+ else:
+ raise Error, "mode must be 'r', 'rb', 'w', or 'wb'"
+
+openfp = open # B/W compatibility
+
+if __name__ == '__main__':
+ import sys
+ if not sys.argv[1:]:
+ sys.argv.append('/usr/demos/data/audio/bach.aiff')
+ fn = sys.argv[1]
+ f = open(fn, 'r')
+ try:
+ print "Reading", fn
+ print "nchannels =", f.getnchannels()
+ print "nframes =", f.getnframes()
+ print "sampwidth =", f.getsampwidth()
+ print "framerate =", f.getframerate()
+ print "comptype =", f.getcomptype()
+ print "compname =", f.getcompname()
+ if sys.argv[2:]:
+ gn = sys.argv[2]
+ print "Writing", gn
+ g = open(gn, 'w')
+ try:
+ g.setparams(f.getparams())
+ while 1:
+ data = f.readframes(1024)
+ if not data:
+ break
+ g.writeframes(data)
+ finally:
+ g.close()
+ print "Done."
+ finally:
+ f.close()
diff --git a/cashew/Lib/antigravity.py b/cashew/Lib/antigravity.py
new file mode 100644
index 0000000..7fb7d82
--- /dev/null
+++ b/cashew/Lib/antigravity.py
@@ -0,0 +1,4 @@
+
+import webbrowser
+
+webbrowser.open("http://xkcd.com/353/")
diff --git a/cashew/Lib/anydbm.py b/cashew/Lib/anydbm.py
new file mode 100644
index 0000000..ba7e905
--- /dev/null
+++ b/cashew/Lib/anydbm.py
@@ -0,0 +1,85 @@
+"""Generic interface to all dbm clones.
+
+Instead of
+
+ import dbm
+ d = dbm.open(file, 'w', 0666)
+
+use
+
+ import anydbm
+ d = anydbm.open(file, 'w')
+
+The returned object is a dbhash, gdbm, dbm or dumbdbm object,
+dependent on the type of database being opened (determined by whichdb
+module) in the case of an existing dbm. If the dbm does not exist and
+the create or new flag ('c' or 'n') was specified, the dbm type will
+be determined by the availability of the modules (tested in the above
+order).
+
+It has the following interface (key and data are strings):
+
+ d[key] = data # store data at key (may override data at
+ # existing key)
+ data = d[key] # retrieve data at key (raise KeyError if no
+ # such key)
+ del d[key] # delete data stored at key (raises KeyError
+ # if no such key)
+ flag = key in d # true if the key exists
+ list = d.keys() # return a list of all existing keys (slow!)
+
+Future versions may change the order in which implementations are
+tested for existence, and add interfaces to other dbm-like
+implementations.
+"""
+
+class error(Exception):
+ pass
+
+_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
+_errors = [error]
+_defaultmod = None
+
+for _name in _names:
+ try:
+ _mod = __import__(_name)
+ except ImportError:
+ continue
+ if not _defaultmod:
+ _defaultmod = _mod
+ _errors.append(_mod.error)
+
+if not _defaultmod:
+ raise ImportError, "no dbm clone found; tried %s" % _names
+
+error = tuple(_errors)
+
+def open(file, flag='r', mode=0666):
+ """Open or create database at path given by *file*.
+
+ Optional argument *flag* can be 'r' (default) for read-only access, 'w'
+ for read-write access of an existing database, 'c' for read-write access
+ to a new or existing database, and 'n' for read-write access to a new
+ database.
+
+ Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
+ only if it doesn't exist; and 'n' always creates a new database.
+ """
+
+ # guess the type of an existing database
+ from whichdb import whichdb
+ result=whichdb(file)
+ if result is None:
+ # db doesn't exist
+ if 'c' in flag or 'n' in flag:
+ # file doesn't exist and the new
+ # flag was used so use default type
+ mod = _defaultmod
+ else:
+ raise error, "need 'c' or 'n' flag to open new db"
+ elif result == "":
+ # db type cannot be determined
+ raise error, "db type could not be determined"
+ else:
+ mod = __import__(result)
+ return mod.open(file, flag, mode)
diff --git a/cashew/Lib/argparse.py b/cashew/Lib/argparse.py
new file mode 100644
index 0000000..1b233b8
--- /dev/null
+++ b/cashew/Lib/argparse.py
@@ -0,0 +1,2374 @@
+# Author: Steven J. Bethard .
+
+"""Command-line parsing library
+
+This module is an optparse-inspired command-line parsing library that:
+
+ - handles both optional and positional arguments
+ - produces highly informative usage messages
+ - supports parsers that dispatch to sub-parsers
+
+The following is a simple usage example that sums integers from the
+command-line and writes the result to a file::
+
+ parser = argparse.ArgumentParser(
+ description='sum the integers at the command line')
+ parser.add_argument(
+ 'integers', metavar='int', nargs='+', type=int,
+ help='an integer to be summed')
+ parser.add_argument(
+ '--log', default=sys.stdout, type=argparse.FileType('w'),
+ help='the file where the sum should be written')
+ args = parser.parse_args()
+ args.log.write('%s' % sum(args.integers))
+ args.log.close()
+
+The module contains the following public classes:
+
+ - ArgumentParser -- The main entry point for command-line parsing. As the
+ example above shows, the add_argument() method is used to populate
+ the parser with actions for optional and positional arguments. Then
+ the parse_args() method is invoked to convert the args at the
+ command-line into an object with attributes.
+
+ - ArgumentError -- The exception raised by ArgumentParser objects when
+ there are errors with the parser's actions. Errors raised while
+ parsing the command-line are caught by ArgumentParser and emitted
+ as command-line messages.
+
+ - FileType -- A factory for defining types of files to be created. As the
+ example above shows, instances of FileType are typically passed as
+ the type= argument of add_argument() calls.
+
+ - Action -- The base class for parser actions. Typically actions are
+ selected by passing strings like 'store_true' or 'append_const' to
+ the action= argument of add_argument(). However, for greater
+ customization of ArgumentParser actions, subclasses of Action may
+ be defined and passed as the action= argument.
+
+ - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter,
+ ArgumentDefaultsHelpFormatter -- Formatter classes which
+ may be passed as the formatter_class= argument to the
+ ArgumentParser constructor. HelpFormatter is the default,
+ RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser
+ not to change the formatting for help text, and
+ ArgumentDefaultsHelpFormatter adds information about argument defaults
+ to the help.
+
+All other classes in this module are considered implementation details.
+(Also note that HelpFormatter and RawDescriptionHelpFormatter are only
+considered public as object names -- the API of the formatter objects is
+still considered an implementation detail.)
+"""
+
+__version__ = '1.1'
+__all__ = [
+ 'ArgumentParser',
+ 'ArgumentError',
+ 'ArgumentTypeError',
+ 'FileType',
+ 'HelpFormatter',
+ 'ArgumentDefaultsHelpFormatter',
+ 'RawDescriptionHelpFormatter',
+ 'RawTextHelpFormatter',
+ 'Namespace',
+ 'Action',
+ 'ONE_OR_MORE',
+ 'OPTIONAL',
+ 'PARSER',
+ 'REMAINDER',
+ 'SUPPRESS',
+ 'ZERO_OR_MORE',
+]
+
+
+import collections as _collections
+import copy as _copy
+import os as _os
+import re as _re
+import sys as _sys
+import textwrap as _textwrap
+
+from gettext import gettext as _
+
+
+def _callable(obj):
+ return hasattr(obj, '__call__') or hasattr(obj, '__bases__')
+
+
+SUPPRESS = '==SUPPRESS=='
+
+OPTIONAL = '?'
+ZERO_OR_MORE = '*'
+ONE_OR_MORE = '+'
+PARSER = 'A...'
+REMAINDER = '...'
+_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args'
+
+# =============================
+# Utility functions and classes
+# =============================
+
+class _AttributeHolder(object):
+ """Abstract base class that provides __repr__.
+
+ The __repr__ method returns a string in the format::
+ ClassName(attr=name, attr=name, ...)
+ The attributes are determined either by a class-level attribute,
+ '_kwarg_names', or by inspecting the instance __dict__.
+ """
+
+ def __repr__(self):
+ type_name = type(self).__name__
+ arg_strings = []
+ for arg in self._get_args():
+ arg_strings.append(repr(arg))
+ for name, value in self._get_kwargs():
+ arg_strings.append('%s=%r' % (name, value))
+ return '%s(%s)' % (type_name, ', '.join(arg_strings))
+
+ def _get_kwargs(self):
+ return sorted(self.__dict__.items())
+
+ def _get_args(self):
+ return []
+
+
+def _ensure_value(namespace, name, value):
+ if getattr(namespace, name, None) is None:
+ setattr(namespace, name, value)
+ return getattr(namespace, name)
+
+
+# ===============
+# Formatting Help
+# ===============
+
+class HelpFormatter(object):
+ """Formatter for generating usage messages and argument help strings.
+
+ Only the name of this class is considered a public API. All the methods
+ provided by the class are considered an implementation detail.
+ """
+
+ def __init__(self,
+ prog,
+ indent_increment=2,
+ max_help_position=24,
+ width=None):
+
+ # default setting for width
+ if width is None:
+ try:
+ width = int(_os.environ['COLUMNS'])
+ except (KeyError, ValueError):
+ width = 80
+ width -= 2
+
+ self._prog = prog
+ self._indent_increment = indent_increment
+ self._max_help_position = max_help_position
+ self._max_help_position = min(max_help_position,
+ max(width - 20, indent_increment * 2))
+ self._width = width
+
+ self._current_indent = 0
+ self._level = 0
+ self._action_max_length = 0
+
+ self._root_section = self._Section(self, None)
+ self._current_section = self._root_section
+
+ self._whitespace_matcher = _re.compile(r'\s+')
+ self._long_break_matcher = _re.compile(r'\n\n\n+')
+
+ # ===============================
+ # Section and indentation methods
+ # ===============================
+ def _indent(self):
+ self._current_indent += self._indent_increment
+ self._level += 1
+
+ def _dedent(self):
+ self._current_indent -= self._indent_increment
+ assert self._current_indent >= 0, 'Indent decreased below 0.'
+ self._level -= 1
+
+ class _Section(object):
+
+ def __init__(self, formatter, parent, heading=None):
+ self.formatter = formatter
+ self.parent = parent
+ self.heading = heading
+ self.items = []
+
+ def format_help(self):
+ # format the indented section
+ if self.parent is not None:
+ self.formatter._indent()
+ join = self.formatter._join_parts
+ for func, args in self.items:
+ func(*args)
+ item_help = join([func(*args) for func, args in self.items])
+ if self.parent is not None:
+ self.formatter._dedent()
+
+ # return nothing if the section was empty
+ if not item_help:
+ return ''
+
+ # add the heading if the section was non-empty
+ if self.heading is not SUPPRESS and self.heading is not None:
+ current_indent = self.formatter._current_indent
+ heading = '%*s%s:\n' % (current_indent, '', self.heading)
+ else:
+ heading = ''
+
+ # join the section-initial newline, the heading and the help
+ return join(['\n', heading, item_help, '\n'])
+
+ def _add_item(self, func, args):
+ self._current_section.items.append((func, args))
+
+ # ========================
+ # Message building methods
+ # ========================
+ def start_section(self, heading):
+ self._indent()
+ section = self._Section(self, self._current_section, heading)
+ self._add_item(section.format_help, [])
+ self._current_section = section
+
+ def end_section(self):
+ self._current_section = self._current_section.parent
+ self._dedent()
+
+ def add_text(self, text):
+ if text is not SUPPRESS and text is not None:
+ self._add_item(self._format_text, [text])
+
+ def add_usage(self, usage, actions, groups, prefix=None):
+ if usage is not SUPPRESS:
+ args = usage, actions, groups, prefix
+ self._add_item(self._format_usage, args)
+
+ def add_argument(self, action):
+ if action.help is not SUPPRESS:
+
+ # find all invocations
+ get_invocation = self._format_action_invocation
+ invocations = [get_invocation(action)]
+ for subaction in self._iter_indented_subactions(action):
+ invocations.append(get_invocation(subaction))
+
+ # update the maximum item length
+ invocation_length = max([len(s) for s in invocations])
+ action_length = invocation_length + self._current_indent
+ self._action_max_length = max(self._action_max_length,
+ action_length)
+
+ # add the item to the list
+ self._add_item(self._format_action, [action])
+
+ def add_arguments(self, actions):
+ for action in actions:
+ self.add_argument(action)
+
+ # =======================
+ # Help-formatting methods
+ # =======================
+ def format_help(self):
+ help = self._root_section.format_help()
+ if help:
+ help = self._long_break_matcher.sub('\n\n', help)
+ help = help.strip('\n') + '\n'
+ return help
+
+ def _join_parts(self, part_strings):
+ return ''.join([part
+ for part in part_strings
+ if part and part is not SUPPRESS])
+
+ def _format_usage(self, usage, actions, groups, prefix):
+ if prefix is None:
+ prefix = _('usage: ')
+
+ # if usage is specified, use that
+ if usage is not None:
+ usage = usage % dict(prog=self._prog)
+
+ # if no optionals or positionals are available, usage is just prog
+ elif usage is None and not actions:
+ usage = '%(prog)s' % dict(prog=self._prog)
+
+ # if optionals and positionals are available, calculate usage
+ elif usage is None:
+ prog = '%(prog)s' % dict(prog=self._prog)
+
+ # split optionals from positionals
+ optionals = []
+ positionals = []
+ for action in actions:
+ if action.option_strings:
+ optionals.append(action)
+ else:
+ positionals.append(action)
+
+ # build full usage string
+ format = self._format_actions_usage
+ action_usage = format(optionals + positionals, groups)
+ usage = ' '.join([s for s in [prog, action_usage] if s])
+
+ # wrap the usage parts if it's too long
+ text_width = self._width - self._current_indent
+ if len(prefix) + len(usage) > text_width:
+
+ # break usage into wrappable parts
+ part_regexp = r'\(.*?\)+|\[.*?\]+|\S+'
+ opt_usage = format(optionals, groups)
+ pos_usage = format(positionals, groups)
+ opt_parts = _re.findall(part_regexp, opt_usage)
+ pos_parts = _re.findall(part_regexp, pos_usage)
+ assert ' '.join(opt_parts) == opt_usage
+ assert ' '.join(pos_parts) == pos_usage
+
+ # helper for wrapping lines
+ def get_lines(parts, indent, prefix=None):
+ lines = []
+ line = []
+ if prefix is not None:
+ line_len = len(prefix) - 1
+ else:
+ line_len = len(indent) - 1
+ for part in parts:
+ if line_len + 1 + len(part) > text_width and line:
+ lines.append(indent + ' '.join(line))
+ line = []
+ line_len = len(indent) - 1
+ line.append(part)
+ line_len += len(part) + 1
+ if line:
+ lines.append(indent + ' '.join(line))
+ if prefix is not None:
+ lines[0] = lines[0][len(indent):]
+ return lines
+
+ # if prog is short, follow it with optionals or positionals
+ if len(prefix) + len(prog) <= 0.75 * text_width:
+ indent = ' ' * (len(prefix) + len(prog) + 1)
+ if opt_parts:
+ lines = get_lines([prog] + opt_parts, indent, prefix)
+ lines.extend(get_lines(pos_parts, indent))
+ elif pos_parts:
+ lines = get_lines([prog] + pos_parts, indent, prefix)
+ else:
+ lines = [prog]
+
+ # if prog is long, put it on its own line
+ else:
+ indent = ' ' * len(prefix)
+ parts = opt_parts + pos_parts
+ lines = get_lines(parts, indent)
+ if len(lines) > 1:
+ lines = []
+ lines.extend(get_lines(opt_parts, indent))
+ lines.extend(get_lines(pos_parts, indent))
+ lines = [prog] + lines
+
+ # join lines into usage
+ usage = '\n'.join(lines)
+
+ # prefix with 'usage:'
+ return '%s%s\n\n' % (prefix, usage)
+
+ def _format_actions_usage(self, actions, groups):
+ # find group indices and identify actions in groups
+ group_actions = set()
+ inserts = {}
+ for group in groups:
+ try:
+ start = actions.index(group._group_actions[0])
+ except ValueError:
+ continue
+ else:
+ end = start + len(group._group_actions)
+ if actions[start:end] == group._group_actions:
+ for action in group._group_actions:
+ group_actions.add(action)
+ if not group.required:
+ if start in inserts:
+ inserts[start] += ' ['
+ else:
+ inserts[start] = '['
+ inserts[end] = ']'
+ else:
+ if start in inserts:
+ inserts[start] += ' ('
+ else:
+ inserts[start] = '('
+ inserts[end] = ')'
+ for i in range(start + 1, end):
+ inserts[i] = '|'
+
+ # collect all actions format strings
+ parts = []
+ for i, action in enumerate(actions):
+
+ # suppressed arguments are marked with None
+ # remove | separators for suppressed arguments
+ if action.help is SUPPRESS:
+ parts.append(None)
+ if inserts.get(i) == '|':
+ inserts.pop(i)
+ elif inserts.get(i + 1) == '|':
+ inserts.pop(i + 1)
+
+ # produce all arg strings
+ elif not action.option_strings:
+ part = self._format_args(action, action.dest)
+
+ # if it's in a group, strip the outer []
+ if action in group_actions:
+ if part[0] == '[' and part[-1] == ']':
+ part = part[1:-1]
+
+ # add the action string to the list
+ parts.append(part)
+
+ # produce the first way to invoke the option in brackets
+ else:
+ option_string = action.option_strings[0]
+
+ # if the Optional doesn't take a value, format is:
+ # -s or --long
+ if action.nargs == 0:
+ part = '%s' % option_string
+
+ # if the Optional takes a value, format is:
+ # -s ARGS or --long ARGS
+ else:
+ default = action.dest.upper()
+ args_string = self._format_args(action, default)
+ part = '%s %s' % (option_string, args_string)
+
+ # make it look optional if it's not required or in a group
+ if not action.required and action not in group_actions:
+ part = '[%s]' % part
+
+ # add the action string to the list
+ parts.append(part)
+
+ # insert things at the necessary indices
+ for i in sorted(inserts, reverse=True):
+ parts[i:i] = [inserts[i]]
+
+ # join all the action items with spaces
+ text = ' '.join([item for item in parts if item is not None])
+
+ # clean up separators for mutually exclusive groups
+ open = r'[\[(]'
+ close = r'[\])]'
+ text = _re.sub(r'(%s) ' % open, r'\1', text)
+ text = _re.sub(r' (%s)' % close, r'\1', text)
+ text = _re.sub(r'%s *%s' % (open, close), r'', text)
+ text = _re.sub(r'\(([^|]*)\)', r'\1', text)
+ text = text.strip()
+
+ # return the text
+ return text
+
+ def _format_text(self, text):
+ if '%(prog)' in text:
+ text = text % dict(prog=self._prog)
+ text_width = max(self._width - self._current_indent, 11)
+ indent = ' ' * self._current_indent
+ return self._fill_text(text, text_width, indent) + '\n\n'
+
+ def _format_action(self, action):
+ # determine the required width and the entry label
+ help_position = min(self._action_max_length + 2,
+ self._max_help_position)
+ help_width = max(self._width - help_position, 11)
+ action_width = help_position - self._current_indent - 2
+ action_header = self._format_action_invocation(action)
+
+ # ho nelp; start on same line and add a final newline
+ if not action.help:
+ tup = self._current_indent, '', action_header
+ action_header = '%*s%s\n' % tup
+
+ # short action name; start on the same line and pad two spaces
+ elif len(action_header) <= action_width:
+ tup = self._current_indent, '', action_width, action_header
+ action_header = '%*s%-*s ' % tup
+ indent_first = 0
+
+ # long action name; start on the next line
+ else:
+ tup = self._current_indent, '', action_header
+ action_header = '%*s%s\n' % tup
+ indent_first = help_position
+
+ # collect the pieces of the action help
+ parts = [action_header]
+
+ # if there was help for the action, add lines of help text
+ if action.help:
+ help_text = self._expand_help(action)
+ help_lines = self._split_lines(help_text, help_width)
+ parts.append('%*s%s\n' % (indent_first, '', help_lines[0]))
+ for line in help_lines[1:]:
+ parts.append('%*s%s\n' % (help_position, '', line))
+
+ # or add a newline if the description doesn't end with one
+ elif not action_header.endswith('\n'):
+ parts.append('\n')
+
+ # if there are any sub-actions, add their help as well
+ for subaction in self._iter_indented_subactions(action):
+ parts.append(self._format_action(subaction))
+
+ # return a single string
+ return self._join_parts(parts)
+
+ def _format_action_invocation(self, action):
+ if not action.option_strings:
+ metavar, = self._metavar_formatter(action, action.dest)(1)
+ return metavar
+
+ else:
+ parts = []
+
+ # if the Optional doesn't take a value, format is:
+ # -s, --long
+ if action.nargs == 0:
+ parts.extend(action.option_strings)
+
+ # if the Optional takes a value, format is:
+ # -s ARGS, --long ARGS
+ else:
+ default = action.dest.upper()
+ args_string = self._format_args(action, default)
+ for option_string in action.option_strings:
+ parts.append('%s %s' % (option_string, args_string))
+
+ return ', '.join(parts)
+
+ def _metavar_formatter(self, action, default_metavar):
+ if action.metavar is not None:
+ result = action.metavar
+ elif action.choices is not None:
+ choice_strs = [str(choice) for choice in action.choices]
+ result = '{%s}' % ','.join(choice_strs)
+ else:
+ result = default_metavar
+
+ def format(tuple_size):
+ if isinstance(result, tuple):
+ return result
+ else:
+ return (result, ) * tuple_size
+ return format
+
+ def _format_args(self, action, default_metavar):
+ get_metavar = self._metavar_formatter(action, default_metavar)
+ if action.nargs is None:
+ result = '%s' % get_metavar(1)
+ elif action.nargs == OPTIONAL:
+ result = '[%s]' % get_metavar(1)
+ elif action.nargs == ZERO_OR_MORE:
+ result = '[%s [%s ...]]' % get_metavar(2)
+ elif action.nargs == ONE_OR_MORE:
+ result = '%s [%s ...]' % get_metavar(2)
+ elif action.nargs == REMAINDER:
+ result = '...'
+ elif action.nargs == PARSER:
+ result = '%s ...' % get_metavar(1)
+ else:
+ formats = ['%s' for _ in range(action.nargs)]
+ result = ' '.join(formats) % get_metavar(action.nargs)
+ return result
+
+ def _expand_help(self, action):
+ params = dict(vars(action), prog=self._prog)
+ for name in list(params):
+ if params[name] is SUPPRESS:
+ del params[name]
+ for name in list(params):
+ if hasattr(params[name], '__name__'):
+ params[name] = params[name].__name__
+ if params.get('choices') is not None:
+ choices_str = ', '.join([str(c) for c in params['choices']])
+ params['choices'] = choices_str
+ return self._get_help_string(action) % params
+
+ def _iter_indented_subactions(self, action):
+ try:
+ get_subactions = action._get_subactions
+ except AttributeError:
+ pass
+ else:
+ self._indent()
+ for subaction in get_subactions():
+ yield subaction
+ self._dedent()
+
+ def _split_lines(self, text, width):
+ text = self._whitespace_matcher.sub(' ', text).strip()
+ return _textwrap.wrap(text, width)
+
+ def _fill_text(self, text, width, indent):
+ text = self._whitespace_matcher.sub(' ', text).strip()
+ return _textwrap.fill(text, width, initial_indent=indent,
+ subsequent_indent=indent)
+
+ def _get_help_string(self, action):
+ return action.help
+
+
+class RawDescriptionHelpFormatter(HelpFormatter):
+ """Help message formatter which retains any formatting in descriptions.
+
+ Only the name of this class is considered a public API. All the methods
+ provided by the class are considered an implementation detail.
+ """
+
+ def _fill_text(self, text, width, indent):
+ return ''.join([indent + line for line in text.splitlines(True)])
+
+
+class RawTextHelpFormatter(RawDescriptionHelpFormatter):
+ """Help message formatter which retains formatting of all help text.
+
+ Only the name of this class is considered a public API. All the methods
+ provided by the class are considered an implementation detail.
+ """
+
+ def _split_lines(self, text, width):
+ return text.splitlines()
+
+
+class ArgumentDefaultsHelpFormatter(HelpFormatter):
+ """Help message formatter which adds default values to argument help.
+
+ Only the name of this class is considered a public API. All the methods
+ provided by the class are considered an implementation detail.
+ """
+
+ def _get_help_string(self, action):
+ help = action.help
+ if '%(default)' not in action.help:
+ if action.default is not SUPPRESS:
+ defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
+ if action.option_strings or action.nargs in defaulting_nargs:
+ help += ' (default: %(default)s)'
+ return help
+
+
+# =====================
+# Options and Arguments
+# =====================
+
+def _get_action_name(argument):
+ if argument is None:
+ return None
+ elif argument.option_strings:
+ return '/'.join(argument.option_strings)
+ elif argument.metavar not in (None, SUPPRESS):
+ return argument.metavar
+ elif argument.dest not in (None, SUPPRESS):
+ return argument.dest
+ else:
+ return None
+
+
+class ArgumentError(Exception):
+ """An error from creating or using an argument (optional or positional).
+
+ The string value of this exception is the message, augmented with
+ information about the argument that caused it.
+ """
+
+ def __init__(self, argument, message):
+ self.argument_name = _get_action_name(argument)
+ self.message = message
+
+ def __str__(self):
+ if self.argument_name is None:
+ format = '%(message)s'
+ else:
+ format = 'argument %(argument_name)s: %(message)s'
+ return format % dict(message=self.message,
+ argument_name=self.argument_name)
+
+
+class ArgumentTypeError(Exception):
+ """An error from trying to convert a command line string to a type."""
+ pass
+
+
+# ==============
+# Action classes
+# ==============
+
+class Action(_AttributeHolder):
+ """Information about how to convert command line strings to Python objects.
+
+ Action objects are used by an ArgumentParser to represent the information
+ needed to parse a single argument from one or more strings from the
+ command line. The keyword arguments to the Action constructor are also
+ all attributes of Action instances.
+
+ Keyword Arguments:
+
+ - option_strings -- A list of command-line option strings which
+ should be associated with this action.
+
+ - dest -- The name of the attribute to hold the created object(s)
+
+ - nargs -- The number of command-line arguments that should be
+ consumed. By default, one argument will be consumed and a single
+ value will be produced. Other values include:
+ - N (an integer) consumes N arguments (and produces a list)
+ - '?' consumes zero or one arguments
+ - '*' consumes zero or more arguments (and produces a list)
+ - '+' consumes one or more arguments (and produces a list)
+ Note that the difference between the default and nargs=1 is that
+ with the default, a single value will be produced, while with
+ nargs=1, a list containing a single value will be produced.
+
+ - const -- The value to be produced if the option is specified and the
+ option uses an action that takes no values.
+
+ - default -- The value to be produced if the option is not specified.
+
+ - type -- A callable that accepts a single string argument, and
+ returns the converted value. The standard Python types str, int,
+ float, and complex are useful examples of such callables. If None,
+ str is used.
+
+ - choices -- A container of values that should be allowed. If not None,
+ after a command-line argument has been converted to the appropriate
+ type, an exception will be raised if it is not a member of this
+ collection.
+
+ - required -- True if the action must always be specified at the
+ command line. This is only meaningful for optional command-line
+ arguments.
+
+ - help -- The help string describing the argument.
+
+ - metavar -- The name to be used for the option's argument with the
+ help string. If None, the 'dest' value will be used as the name.
+ """
+
+ def __init__(self,
+ option_strings,
+ dest,
+ nargs=None,
+ const=None,
+ default=None,
+ type=None,
+ choices=None,
+ required=False,
+ help=None,
+ metavar=None):
+ self.option_strings = option_strings
+ self.dest = dest
+ self.nargs = nargs
+ self.const = const
+ self.default = default
+ self.type = type
+ self.choices = choices
+ self.required = required
+ self.help = help
+ self.metavar = metavar
+
+ def _get_kwargs(self):
+ names = [
+ 'option_strings',
+ 'dest',
+ 'nargs',
+ 'const',
+ 'default',
+ 'type',
+ 'choices',
+ 'help',
+ 'metavar',
+ ]
+ return [(name, getattr(self, name)) for name in names]
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ raise NotImplementedError(_('.__call__() not defined'))
+
+
+class _StoreAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ nargs=None,
+ const=None,
+ default=None,
+ type=None,
+ choices=None,
+ required=False,
+ help=None,
+ metavar=None):
+ if nargs == 0:
+ raise ValueError('nargs for store actions must be > 0; if you '
+ 'have nothing to store, actions such as store '
+ 'true or store const may be more appropriate')
+ if const is not None and nargs != OPTIONAL:
+ raise ValueError('nargs must be %r to supply const' % OPTIONAL)
+ super(_StoreAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=nargs,
+ const=const,
+ default=default,
+ type=type,
+ choices=choices,
+ required=required,
+ help=help,
+ metavar=metavar)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, values)
+
+
+class _StoreConstAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ const,
+ default=None,
+ required=False,
+ help=None,
+ metavar=None):
+ super(_StoreConstAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=0,
+ const=const,
+ default=default,
+ required=required,
+ help=help)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, self.const)
+
+
+class _StoreTrueAction(_StoreConstAction):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ default=False,
+ required=False,
+ help=None):
+ super(_StoreTrueAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ const=True,
+ default=default,
+ required=required,
+ help=help)
+
+
+class _StoreFalseAction(_StoreConstAction):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ default=True,
+ required=False,
+ help=None):
+ super(_StoreFalseAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ const=False,
+ default=default,
+ required=required,
+ help=help)
+
+
+class _AppendAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ nargs=None,
+ const=None,
+ default=None,
+ type=None,
+ choices=None,
+ required=False,
+ help=None,
+ metavar=None):
+ if nargs == 0:
+ raise ValueError('nargs for append actions must be > 0; if arg '
+ 'strings are not supplying the value to append, '
+ 'the append const action may be more appropriate')
+ if const is not None and nargs != OPTIONAL:
+ raise ValueError('nargs must be %r to supply const' % OPTIONAL)
+ super(_AppendAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=nargs,
+ const=const,
+ default=default,
+ type=type,
+ choices=choices,
+ required=required,
+ help=help,
+ metavar=metavar)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ items = _copy.copy(_ensure_value(namespace, self.dest, []))
+ items.append(values)
+ setattr(namespace, self.dest, items)
+
+
+class _AppendConstAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ const,
+ default=None,
+ required=False,
+ help=None,
+ metavar=None):
+ super(_AppendConstAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=0,
+ const=const,
+ default=default,
+ required=required,
+ help=help,
+ metavar=metavar)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ items = _copy.copy(_ensure_value(namespace, self.dest, []))
+ items.append(self.const)
+ setattr(namespace, self.dest, items)
+
+
+class _CountAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest,
+ default=None,
+ required=False,
+ help=None):
+ super(_CountAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=0,
+ default=default,
+ required=required,
+ help=help)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ new_count = _ensure_value(namespace, self.dest, 0) + 1
+ setattr(namespace, self.dest, new_count)
+
+
+class _HelpAction(Action):
+
+ def __init__(self,
+ option_strings,
+ dest=SUPPRESS,
+ default=SUPPRESS,
+ help=None):
+ super(_HelpAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ default=default,
+ nargs=0,
+ help=help)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ parser.print_help()
+ parser.exit()
+
+
+class _VersionAction(Action):
+
+ def __init__(self,
+ option_strings,
+ version=None,
+ dest=SUPPRESS,
+ default=SUPPRESS,
+ help="show program's version number and exit"):
+ super(_VersionAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ default=default,
+ nargs=0,
+ help=help)
+ self.version = version
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ version = self.version
+ if version is None:
+ version = parser.version
+ formatter = parser._get_formatter()
+ formatter.add_text(version)
+ parser.exit(message=formatter.format_help())
+
+
+class _SubParsersAction(Action):
+
+ class _ChoicesPseudoAction(Action):
+
+ def __init__(self, name, help):
+ sup = super(_SubParsersAction._ChoicesPseudoAction, self)
+ sup.__init__(option_strings=[], dest=name, help=help)
+
+ def __init__(self,
+ option_strings,
+ prog,
+ parser_class,
+ dest=SUPPRESS,
+ help=None,
+ metavar=None):
+
+ self._prog_prefix = prog
+ self._parser_class = parser_class
+ self._name_parser_map = _collections.OrderedDict()
+ self._choices_actions = []
+
+ super(_SubParsersAction, self).__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=PARSER,
+ choices=self._name_parser_map,
+ help=help,
+ metavar=metavar)
+
+ def add_parser(self, name, **kwargs):
+ # set prog from the existing prefix
+ if kwargs.get('prog') is None:
+ kwargs['prog'] = '%s %s' % (self._prog_prefix, name)
+
+ # create a pseudo-action to hold the choice help
+ if 'help' in kwargs:
+ help = kwargs.pop('help')
+ choice_action = self._ChoicesPseudoAction(name, help)
+ self._choices_actions.append(choice_action)
+
+ # create the parser and add it to the map
+ parser = self._parser_class(**kwargs)
+ self._name_parser_map[name] = parser
+ return parser
+
+ def _get_subactions(self):
+ return self._choices_actions
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ parser_name = values[0]
+ arg_strings = values[1:]
+
+ # set the parser name if requested
+ if self.dest is not SUPPRESS:
+ setattr(namespace, self.dest, parser_name)
+
+ # select the parser
+ try:
+ parser = self._name_parser_map[parser_name]
+ except KeyError:
+ tup = parser_name, ', '.join(self._name_parser_map)
+ msg = _('unknown parser %r (choices: %s)') % tup
+ raise ArgumentError(self, msg)
+
+ # parse all the remaining options into the namespace
+ # store any unrecognized options on the object, so that the top
+ # level parser can decide what to do with them
+
+ # In case this subparser defines new defaults, we parse them
+ # in a new namespace object and then update the original
+ # namespace for the relevant parts.
+ subnamespace, arg_strings = parser.parse_known_args(arg_strings, None)
+ for key, value in vars(subnamespace).items():
+ setattr(namespace, key, value)
+
+ if arg_strings:
+ vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, [])
+ getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings)
+
+
+# ==============
+# Type classes
+# ==============
+
+class FileType(object):
+ """Factory for creating file object types
+
+ Instances of FileType are typically passed as type= arguments to the
+ ArgumentParser add_argument() method.
+
+ Keyword Arguments:
+ - mode -- A string indicating how the file is to be opened. Accepts the
+ same values as the builtin open() function.
+ - bufsize -- The file's desired buffer size. Accepts the same values as
+ the builtin open() function.
+ """
+
+ def __init__(self, mode='r', bufsize=-1):
+ self._mode = mode
+ self._bufsize = bufsize
+
+ def __call__(self, string):
+ # the special argument "-" means sys.std{in,out}
+ if string == '-':
+ if 'r' in self._mode:
+ return _sys.stdin
+ elif 'w' in self._mode:
+ return _sys.stdout
+ else:
+ msg = _('argument "-" with mode %r') % self._mode
+ raise ValueError(msg)
+
+ # all other arguments are used as file names
+ try:
+ return open(string, self._mode, self._bufsize)
+ except IOError as e:
+ message = _("can't open '%s': %s")
+ raise ArgumentTypeError(message % (string, e))
+
+ def __repr__(self):
+ args = self._mode, self._bufsize
+ args_str = ', '.join(repr(arg) for arg in args if arg != -1)
+ return '%s(%s)' % (type(self).__name__, args_str)
+
+# ===========================
+# Optional and Positional Parsing
+# ===========================
+
+class Namespace(_AttributeHolder):
+ """Simple object for storing attributes.
+
+ Implements equality by attribute names and values, and provides a simple
+ string representation.
+ """
+
+ def __init__(self, **kwargs):
+ for name in kwargs:
+ setattr(self, name, kwargs[name])
+
+ __hash__ = None
+
+ def __eq__(self, other):
+ if not isinstance(other, Namespace):
+ return NotImplemented
+ return vars(self) == vars(other)
+
+ def __ne__(self, other):
+ if not isinstance(other, Namespace):
+ return NotImplemented
+ return not (self == other)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+
+class _ActionsContainer(object):
+
+ def __init__(self,
+ description,
+ prefix_chars,
+ argument_default,
+ conflict_handler):
+ super(_ActionsContainer, self).__init__()
+
+ self.description = description
+ self.argument_default = argument_default
+ self.prefix_chars = prefix_chars
+ self.conflict_handler = conflict_handler
+
+ # set up registries
+ self._registries = {}
+
+ # register actions
+ self.register('action', None, _StoreAction)
+ self.register('action', 'store', _StoreAction)
+ self.register('action', 'store_const', _StoreConstAction)
+ self.register('action', 'store_true', _StoreTrueAction)
+ self.register('action', 'store_false', _StoreFalseAction)
+ self.register('action', 'append', _AppendAction)
+ self.register('action', 'append_const', _AppendConstAction)
+ self.register('action', 'count', _CountAction)
+ self.register('action', 'help', _HelpAction)
+ self.register('action', 'version', _VersionAction)
+ self.register('action', 'parsers', _SubParsersAction)
+
+ # raise an exception if the conflict handler is invalid
+ self._get_handler()
+
+ # action storage
+ self._actions = []
+ self._option_string_actions = {}
+
+ # groups
+ self._action_groups = []
+ self._mutually_exclusive_groups = []
+
+ # defaults storage
+ self._defaults = {}
+
+ # determines whether an "option" looks like a negative number
+ self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$')
+
+ # whether or not there are any optionals that look like negative
+ # numbers -- uses a list so it can be shared and edited
+ self._has_negative_number_optionals = []
+
+ # ====================
+ # Registration methods
+ # ====================
+ def register(self, registry_name, value, object):
+ registry = self._registries.setdefault(registry_name, {})
+ registry[value] = object
+
+ def _registry_get(self, registry_name, value, default=None):
+ return self._registries[registry_name].get(value, default)
+
+ # ==================================
+ # Namespace default accessor methods
+ # ==================================
+ def set_defaults(self, **kwargs):
+ self._defaults.update(kwargs)
+
+ # if these defaults match any existing arguments, replace
+ # the previous default on the object with the new one
+ for action in self._actions:
+ if action.dest in kwargs:
+ action.default = kwargs[action.dest]
+
+ def get_default(self, dest):
+ for action in self._actions:
+ if action.dest == dest and action.default is not None:
+ return action.default
+ return self._defaults.get(dest, None)
+
+
+ # =======================
+ # Adding argument actions
+ # =======================
+ def add_argument(self, *args, **kwargs):
+ """
+ add_argument(dest, ..., name=value, ...)
+ add_argument(option_string, option_string, ..., name=value, ...)
+ """
+
+ # if no positional args are supplied or only one is supplied and
+ # it doesn't look like an option string, parse a positional
+ # argument
+ chars = self.prefix_chars
+ if not args or len(args) == 1 and args[0][0] not in chars:
+ if args and 'dest' in kwargs:
+ raise ValueError('dest supplied twice for positional argument')
+ kwargs = self._get_positional_kwargs(*args, **kwargs)
+
+ # otherwise, we're adding an optional argument
+ else:
+ kwargs = self._get_optional_kwargs(*args, **kwargs)
+
+ # if no default was supplied, use the parser-level default
+ if 'default' not in kwargs:
+ dest = kwargs['dest']
+ if dest in self._defaults:
+ kwargs['default'] = self._defaults[dest]
+ elif self.argument_default is not None:
+ kwargs['default'] = self.argument_default
+
+ # create the action object, and add it to the parser
+ action_class = self._pop_action_class(kwargs)
+ if not _callable(action_class):
+ raise ValueError('unknown action "%s"' % (action_class,))
+ action = action_class(**kwargs)
+
+ # raise an error if the action type is not callable
+ type_func = self._registry_get('type', action.type, action.type)
+ if not _callable(type_func):
+ raise ValueError('%r is not callable' % (type_func,))
+
+ # raise an error if the metavar does not match the type
+ if hasattr(self, "_get_formatter"):
+ try:
+ self._get_formatter()._format_args(action, None)
+ except TypeError:
+ raise ValueError("length of metavar tuple does not match nargs")
+
+ return self._add_action(action)
+
+ def add_argument_group(self, *args, **kwargs):
+ group = _ArgumentGroup(self, *args, **kwargs)
+ self._action_groups.append(group)
+ return group
+
+ def add_mutually_exclusive_group(self, **kwargs):
+ group = _MutuallyExclusiveGroup(self, **kwargs)
+ self._mutually_exclusive_groups.append(group)
+ return group
+
+ def _add_action(self, action):
+ # resolve any conflicts
+ self._check_conflict(action)
+
+ # add to actions list
+ self._actions.append(action)
+ action.container = self
+
+ # index the action by any option strings it has
+ for option_string in action.option_strings:
+ self._option_string_actions[option_string] = action
+
+ # set the flag if any option strings look like negative numbers
+ for option_string in action.option_strings:
+ if self._negative_number_matcher.match(option_string):
+ if not self._has_negative_number_optionals:
+ self._has_negative_number_optionals.append(True)
+
+ # return the created action
+ return action
+
+ def _remove_action(self, action):
+ self._actions.remove(action)
+
+ def _add_container_actions(self, container):
+ # collect groups by titles
+ title_group_map = {}
+ for group in self._action_groups:
+ if group.title in title_group_map:
+ msg = _('cannot merge actions - two groups are named %r')
+ raise ValueError(msg % (group.title))
+ title_group_map[group.title] = group
+
+ # map each action to its group
+ group_map = {}
+ for group in container._action_groups:
+
+ # if a group with the title exists, use that, otherwise
+ # create a new group matching the container's group
+ if group.title not in title_group_map:
+ title_group_map[group.title] = self.add_argument_group(
+ title=group.title,
+ description=group.description,
+ conflict_handler=group.conflict_handler)
+
+ # map the actions to their new group
+ for action in group._group_actions:
+ group_map[action] = title_group_map[group.title]
+
+ # add container's mutually exclusive groups
+ # NOTE: if add_mutually_exclusive_group ever gains title= and
+ # description= then this code will need to be expanded as above
+ for group in container._mutually_exclusive_groups:
+ mutex_group = self.add_mutually_exclusive_group(
+ required=group.required)
+
+ # map the actions to their new mutex group
+ for action in group._group_actions:
+ group_map[action] = mutex_group
+
+ # add all actions to this container or their group
+ for action in container._actions:
+ group_map.get(action, self)._add_action(action)
+
+ def _get_positional_kwargs(self, dest, **kwargs):
+ # make sure required is not specified
+ if 'required' in kwargs:
+ msg = _("'required' is an invalid argument for positionals")
+ raise TypeError(msg)
+
+ # mark positional arguments as required if at least one is
+ # always required
+ if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]:
+ kwargs['required'] = True
+ if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs:
+ kwargs['required'] = True
+
+ # return the keyword arguments with no option strings
+ return dict(kwargs, dest=dest, option_strings=[])
+
+ def _get_optional_kwargs(self, *args, **kwargs):
+ # determine short and long option strings
+ option_strings = []
+ long_option_strings = []
+ for option_string in args:
+ # error on strings that don't start with an appropriate prefix
+ if not option_string[0] in self.prefix_chars:
+ msg = _('invalid option string %r: '
+ 'must start with a character %r')
+ tup = option_string, self.prefix_chars
+ raise ValueError(msg % tup)
+
+ # strings starting with two prefix characters are long options
+ option_strings.append(option_string)
+ if option_string[0] in self.prefix_chars:
+ if len(option_string) > 1:
+ if option_string[1] in self.prefix_chars:
+ long_option_strings.append(option_string)
+
+ # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x'
+ dest = kwargs.pop('dest', None)
+ if dest is None:
+ if long_option_strings:
+ dest_option_string = long_option_strings[0]
+ else:
+ dest_option_string = option_strings[0]
+ dest = dest_option_string.lstrip(self.prefix_chars)
+ if not dest:
+ msg = _('dest= is required for options like %r')
+ raise ValueError(msg % option_string)
+ dest = dest.replace('-', '_')
+
+ # return the updated keyword arguments
+ return dict(kwargs, dest=dest, option_strings=option_strings)
+
+ def _pop_action_class(self, kwargs, default=None):
+ action = kwargs.pop('action', default)
+ return self._registry_get('action', action, action)
+
+ def _get_handler(self):
+ # determine function from conflict handler string
+ handler_func_name = '_handle_conflict_%s' % self.conflict_handler
+ try:
+ return getattr(self, handler_func_name)
+ except AttributeError:
+ msg = _('invalid conflict_resolution value: %r')
+ raise ValueError(msg % self.conflict_handler)
+
+ def _check_conflict(self, action):
+
+ # find all options that conflict with this option
+ confl_optionals = []
+ for option_string in action.option_strings:
+ if option_string in self._option_string_actions:
+ confl_optional = self._option_string_actions[option_string]
+ confl_optionals.append((option_string, confl_optional))
+
+ # resolve any conflicts
+ if confl_optionals:
+ conflict_handler = self._get_handler()
+ conflict_handler(action, confl_optionals)
+
+ def _handle_conflict_error(self, action, conflicting_actions):
+ message = _('conflicting option string(s): %s')
+ conflict_string = ', '.join([option_string
+ for option_string, action
+ in conflicting_actions])
+ raise ArgumentError(action, message % conflict_string)
+
+ def _handle_conflict_resolve(self, action, conflicting_actions):
+
+ # remove all conflicting options
+ for option_string, action in conflicting_actions:
+
+ # remove the conflicting option
+ action.option_strings.remove(option_string)
+ self._option_string_actions.pop(option_string, None)
+
+ # if the option now has no option string, remove it from the
+ # container holding it
+ if not action.option_strings:
+ action.container._remove_action(action)
+
+
+class _ArgumentGroup(_ActionsContainer):
+
+ def __init__(self, container, title=None, description=None, **kwargs):
+ # add any missing keyword arguments by checking the container
+ update = kwargs.setdefault
+ update('conflict_handler', container.conflict_handler)
+ update('prefix_chars', container.prefix_chars)
+ update('argument_default', container.argument_default)
+ super_init = super(_ArgumentGroup, self).__init__
+ super_init(description=description, **kwargs)
+
+ # group attributes
+ self.title = title
+ self._group_actions = []
+
+ # share most attributes with the container
+ self._registries = container._registries
+ self._actions = container._actions
+ self._option_string_actions = container._option_string_actions
+ self._defaults = container._defaults
+ self._has_negative_number_optionals = \
+ container._has_negative_number_optionals
+ self._mutually_exclusive_groups = container._mutually_exclusive_groups
+
+ def _add_action(self, action):
+ action = super(_ArgumentGroup, self)._add_action(action)
+ self._group_actions.append(action)
+ return action
+
+ def _remove_action(self, action):
+ super(_ArgumentGroup, self)._remove_action(action)
+ self._group_actions.remove(action)
+
+
+class _MutuallyExclusiveGroup(_ArgumentGroup):
+
+ def __init__(self, container, required=False):
+ super(_MutuallyExclusiveGroup, self).__init__(container)
+ self.required = required
+ self._container = container
+
+ def _add_action(self, action):
+ if action.required:
+ msg = _('mutually exclusive arguments must be optional')
+ raise ValueError(msg)
+ action = self._container._add_action(action)
+ self._group_actions.append(action)
+ return action
+
+ def _remove_action(self, action):
+ self._container._remove_action(action)
+ self._group_actions.remove(action)
+
+
+class ArgumentParser(_AttributeHolder, _ActionsContainer):
+ """Object for parsing command line strings into Python objects.
+
+ Keyword Arguments:
+ - prog -- The name of the program (default: sys.argv[0])
+ - usage -- A usage message (default: auto-generated from arguments)
+ - description -- A description of what the program does
+ - epilog -- Text following the argument descriptions
+ - parents -- Parsers whose arguments should be copied into this one
+ - formatter_class -- HelpFormatter class for printing help messages
+ - prefix_chars -- Characters that prefix optional arguments
+ - fromfile_prefix_chars -- Characters that prefix files containing
+ additional arguments
+ - argument_default -- The default value for all arguments
+ - conflict_handler -- String indicating how to handle conflicts
+ - add_help -- Add a -h/-help option
+ """
+
+ def __init__(self,
+ prog=None,
+ usage=None,
+ description=None,
+ epilog=None,
+ version=None,
+ parents=[],
+ formatter_class=HelpFormatter,
+ prefix_chars='-',
+ fromfile_prefix_chars=None,
+ argument_default=None,
+ conflict_handler='error',
+ add_help=True):
+
+ if version is not None:
+ import warnings
+ warnings.warn(
+ """The "version" argument to ArgumentParser is deprecated. """
+ """Please use """
+ """"add_argument(..., action='version', version="N", ...)" """
+ """instead""", DeprecationWarning)
+
+ superinit = super(ArgumentParser, self).__init__
+ superinit(description=description,
+ prefix_chars=prefix_chars,
+ argument_default=argument_default,
+ conflict_handler=conflict_handler)
+
+ # default setting for prog
+ if prog is None:
+ prog = _os.path.basename(_sys.argv[0])
+
+ self.prog = prog
+ self.usage = usage
+ self.epilog = epilog
+ self.version = version
+ self.formatter_class = formatter_class
+ self.fromfile_prefix_chars = fromfile_prefix_chars
+ self.add_help = add_help
+
+ add_group = self.add_argument_group
+ self._positionals = add_group(_('positional arguments'))
+ self._optionals = add_group(_('optional arguments'))
+ self._subparsers = None
+
+ # register types
+ def identity(string):
+ return string
+ self.register('type', None, identity)
+
+ # add help and version arguments if necessary
+ # (using explicit default to override global argument_default)
+ default_prefix = '-' if '-' in prefix_chars else prefix_chars[0]
+ if self.add_help:
+ self.add_argument(
+ default_prefix+'h', default_prefix*2+'help',
+ action='help', default=SUPPRESS,
+ help=_('show this help message and exit'))
+ if self.version:
+ self.add_argument(
+ default_prefix+'v', default_prefix*2+'version',
+ action='version', default=SUPPRESS,
+ version=self.version,
+ help=_("show program's version number and exit"))
+
+ # add parent arguments and defaults
+ for parent in parents:
+ self._add_container_actions(parent)
+ try:
+ defaults = parent._defaults
+ except AttributeError:
+ pass
+ else:
+ self._defaults.update(defaults)
+
+ # =======================
+ # Pretty __repr__ methods
+ # =======================
+ def _get_kwargs(self):
+ names = [
+ 'prog',
+ 'usage',
+ 'description',
+ 'version',
+ 'formatter_class',
+ 'conflict_handler',
+ 'add_help',
+ ]
+ return [(name, getattr(self, name)) for name in names]
+
+ # ==================================
+ # Optional/Positional adding methods
+ # ==================================
+ def add_subparsers(self, **kwargs):
+ if self._subparsers is not None:
+ self.error(_('cannot have multiple subparser arguments'))
+
+ # add the parser class to the arguments if it's not present
+ kwargs.setdefault('parser_class', type(self))
+
+ if 'title' in kwargs or 'description' in kwargs:
+ title = _(kwargs.pop('title', 'subcommands'))
+ description = _(kwargs.pop('description', None))
+ self._subparsers = self.add_argument_group(title, description)
+ else:
+ self._subparsers = self._positionals
+
+ # prog defaults to the usage message of this parser, skipping
+ # optional arguments and with no "usage:" prefix
+ if kwargs.get('prog') is None:
+ formatter = self._get_formatter()
+ positionals = self._get_positional_actions()
+ groups = self._mutually_exclusive_groups
+ formatter.add_usage(self.usage, positionals, groups, '')
+ kwargs['prog'] = formatter.format_help().strip()
+
+ # create the parsers action and add it to the positionals list
+ parsers_class = self._pop_action_class(kwargs, 'parsers')
+ action = parsers_class(option_strings=[], **kwargs)
+ self._subparsers._add_action(action)
+
+ # return the created parsers action
+ return action
+
+ def _add_action(self, action):
+ if action.option_strings:
+ self._optionals._add_action(action)
+ else:
+ self._positionals._add_action(action)
+ return action
+
+ def _get_optional_actions(self):
+ return [action
+ for action in self._actions
+ if action.option_strings]
+
+ def _get_positional_actions(self):
+ return [action
+ for action in self._actions
+ if not action.option_strings]
+
+ # =====================================
+ # Command line argument parsing methods
+ # =====================================
+ def parse_args(self, args=None, namespace=None):
+ args, argv = self.parse_known_args(args, namespace)
+ if argv:
+ msg = _('unrecognized arguments: %s')
+ self.error(msg % ' '.join(argv))
+ return args
+
+ def parse_known_args(self, args=None, namespace=None):
+ if args is None:
+ # args default to the system args
+ args = _sys.argv[1:]
+ else:
+ # make sure that args are mutable
+ args = list(args)
+
+ # default Namespace built from parser defaults
+ if namespace is None:
+ namespace = Namespace()
+
+ # add any action defaults that aren't present
+ for action in self._actions:
+ if action.dest is not SUPPRESS:
+ if not hasattr(namespace, action.dest):
+ if action.default is not SUPPRESS:
+ setattr(namespace, action.dest, action.default)
+
+ # add any parser defaults that aren't present
+ for dest in self._defaults:
+ if not hasattr(namespace, dest):
+ setattr(namespace, dest, self._defaults[dest])
+
+ # parse the arguments and exit if there are any errors
+ try:
+ namespace, args = self._parse_known_args(args, namespace)
+ if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR):
+ args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR))
+ delattr(namespace, _UNRECOGNIZED_ARGS_ATTR)
+ return namespace, args
+ except ArgumentError:
+ err = _sys.exc_info()[1]
+ self.error(str(err))
+
+ def _parse_known_args(self, arg_strings, namespace):
+ # replace arg strings that are file references
+ if self.fromfile_prefix_chars is not None:
+ arg_strings = self._read_args_from_files(arg_strings)
+
+ # map all mutually exclusive arguments to the other arguments
+ # they can't occur with
+ action_conflicts = {}
+ for mutex_group in self._mutually_exclusive_groups:
+ group_actions = mutex_group._group_actions
+ for i, mutex_action in enumerate(mutex_group._group_actions):
+ conflicts = action_conflicts.setdefault(mutex_action, [])
+ conflicts.extend(group_actions[:i])
+ conflicts.extend(group_actions[i + 1:])
+
+ # find all option indices, and determine the arg_string_pattern
+ # which has an 'O' if there is an option at an index,
+ # an 'A' if there is an argument, or a '-' if there is a '--'
+ option_string_indices = {}
+ arg_string_pattern_parts = []
+ arg_strings_iter = iter(arg_strings)
+ for i, arg_string in enumerate(arg_strings_iter):
+
+ # all args after -- are non-options
+ if arg_string == '--':
+ arg_string_pattern_parts.append('-')
+ for arg_string in arg_strings_iter:
+ arg_string_pattern_parts.append('A')
+
+ # otherwise, add the arg to the arg strings
+ # and note the index if it was an option
+ else:
+ option_tuple = self._parse_optional(arg_string)
+ if option_tuple is None:
+ pattern = 'A'
+ else:
+ option_string_indices[i] = option_tuple
+ pattern = 'O'
+ arg_string_pattern_parts.append(pattern)
+
+ # join the pieces together to form the pattern
+ arg_strings_pattern = ''.join(arg_string_pattern_parts)
+
+ # converts arg strings to the appropriate and then takes the action
+ seen_actions = set()
+ seen_non_default_actions = set()
+
+ def take_action(action, argument_strings, option_string=None):
+ seen_actions.add(action)
+ argument_values = self._get_values(action, argument_strings)
+
+ # error if this argument is not allowed with other previously
+ # seen arguments, assuming that actions that use the default
+ # value don't really count as "present"
+ if argument_values is not action.default:
+ seen_non_default_actions.add(action)
+ for conflict_action in action_conflicts.get(action, []):
+ if conflict_action in seen_non_default_actions:
+ msg = _('not allowed with argument %s')
+ action_name = _get_action_name(conflict_action)
+ raise ArgumentError(action, msg % action_name)
+
+ # take the action if we didn't receive a SUPPRESS value
+ # (e.g. from a default)
+ if argument_values is not SUPPRESS:
+ action(self, namespace, argument_values, option_string)
+
+ # function to convert arg_strings into an optional action
+ def consume_optional(start_index):
+
+ # get the optional identified at this index
+ option_tuple = option_string_indices[start_index]
+ action, option_string, explicit_arg = option_tuple
+
+ # identify additional optionals in the same arg string
+ # (e.g. -xyz is the same as -x -y -z if no args are required)
+ match_argument = self._match_argument
+ action_tuples = []
+ while True:
+
+ # if we found no optional action, skip it
+ if action is None:
+ extras.append(arg_strings[start_index])
+ return start_index + 1
+
+ # if there is an explicit argument, try to match the
+ # optional's string arguments to only this
+ if explicit_arg is not None:
+ arg_count = match_argument(action, 'A')
+
+ # if the action is a single-dash option and takes no
+ # arguments, try to parse more single-dash options out
+ # of the tail of the option string
+ chars = self.prefix_chars
+ if arg_count == 0 and option_string[1] not in chars:
+ action_tuples.append((action, [], option_string))
+ char = option_string[0]
+ option_string = char + explicit_arg[0]
+ new_explicit_arg = explicit_arg[1:] or None
+ optionals_map = self._option_string_actions
+ if option_string in optionals_map:
+ action = optionals_map[option_string]
+ explicit_arg = new_explicit_arg
+ else:
+ msg = _('ignored explicit argument %r')
+ raise ArgumentError(action, msg % explicit_arg)
+
+ # if the action expect exactly one argument, we've
+ # successfully matched the option; exit the loop
+ elif arg_count == 1:
+ stop = start_index + 1
+ args = [explicit_arg]
+ action_tuples.append((action, args, option_string))
+ break
+
+ # error if a double-dash option did not use the
+ # explicit argument
+ else:
+ msg = _('ignored explicit argument %r')
+ raise ArgumentError(action, msg % explicit_arg)
+
+ # if there is no explicit argument, try to match the
+ # optional's string arguments with the following strings
+ # if successful, exit the loop
+ else:
+ start = start_index + 1
+ selected_patterns = arg_strings_pattern[start:]
+ arg_count = match_argument(action, selected_patterns)
+ stop = start + arg_count
+ args = arg_strings[start:stop]
+ action_tuples.append((action, args, option_string))
+ break
+
+ # add the Optional to the list and return the index at which
+ # the Optional's string args stopped
+ assert action_tuples
+ for action, args, option_string in action_tuples:
+ take_action(action, args, option_string)
+ return stop
+
+ # the list of Positionals left to be parsed; this is modified
+ # by consume_positionals()
+ positionals = self._get_positional_actions()
+
+ # function to convert arg_strings into positional actions
+ def consume_positionals(start_index):
+ # match as many Positionals as possible
+ match_partial = self._match_arguments_partial
+ selected_pattern = arg_strings_pattern[start_index:]
+ arg_counts = match_partial(positionals, selected_pattern)
+
+ # slice off the appropriate arg strings for each Positional
+ # and add the Positional and its args to the list
+ for action, arg_count in zip(positionals, arg_counts):
+ args = arg_strings[start_index: start_index + arg_count]
+ start_index += arg_count
+ take_action(action, args)
+
+ # slice off the Positionals that we just parsed and return the
+ # index at which the Positionals' string args stopped
+ positionals[:] = positionals[len(arg_counts):]
+ return start_index
+
+ # consume Positionals and Optionals alternately, until we have
+ # passed the last option string
+ extras = []
+ start_index = 0
+ if option_string_indices:
+ max_option_string_index = max(option_string_indices)
+ else:
+ max_option_string_index = -1
+ while start_index <= max_option_string_index:
+
+ # consume any Positionals preceding the next option
+ next_option_string_index = min([
+ index
+ for index in option_string_indices
+ if index >= start_index])
+ if start_index != next_option_string_index:
+ positionals_end_index = consume_positionals(start_index)
+
+ # only try to parse the next optional if we didn't consume
+ # the option string during the positionals parsing
+ if positionals_end_index > start_index:
+ start_index = positionals_end_index
+ continue
+ else:
+ start_index = positionals_end_index
+
+ # if we consumed all the positionals we could and we're not
+ # at the index of an option string, there were extra arguments
+ if start_index not in option_string_indices:
+ strings = arg_strings[start_index:next_option_string_index]
+ extras.extend(strings)
+ start_index = next_option_string_index
+
+ # consume the next optional and any arguments for it
+ start_index = consume_optional(start_index)
+
+ # consume any positionals following the last Optional
+ stop_index = consume_positionals(start_index)
+
+ # if we didn't consume all the argument strings, there were extras
+ extras.extend(arg_strings[stop_index:])
+
+ # if we didn't use all the Positional objects, there were too few
+ # arg strings supplied.
+ if positionals:
+ self.error(_('too few arguments'))
+
+ # make sure all required actions were present, and convert defaults.
+ for action in self._actions:
+ if action not in seen_actions:
+ if action.required:
+ name = _get_action_name(action)
+ self.error(_('argument %s is required') % name)
+ else:
+ # Convert action default now instead of doing it before
+ # parsing arguments to avoid calling convert functions
+ # twice (which may fail) if the argument was given, but
+ # only if it was defined already in the namespace
+ if (action.default is not None and
+ isinstance(action.default, basestring) and
+ hasattr(namespace, action.dest) and
+ action.default is getattr(namespace, action.dest)):
+ setattr(namespace, action.dest,
+ self._get_value(action, action.default))
+
+ # make sure all required groups had one option present
+ for group in self._mutually_exclusive_groups:
+ if group.required:
+ for action in group._group_actions:
+ if action in seen_non_default_actions:
+ break
+
+ # if no actions were used, report the error
+ else:
+ names = [_get_action_name(action)
+ for action in group._group_actions
+ if action.help is not SUPPRESS]
+ msg = _('one of the arguments %s is required')
+ self.error(msg % ' '.join(names))
+
+ # return the updated namespace and the extra arguments
+ return namespace, extras
+
+ def _read_args_from_files(self, arg_strings):
+ # expand arguments referencing files
+ new_arg_strings = []
+ for arg_string in arg_strings:
+
+ # for regular arguments, just add them back into the list
+ if not arg_string or arg_string[0] not in self.fromfile_prefix_chars:
+ new_arg_strings.append(arg_string)
+
+ # replace arguments referencing files with the file content
+ else:
+ try:
+ args_file = open(arg_string[1:])
+ try:
+ arg_strings = []
+ for arg_line in args_file.read().splitlines():
+ for arg in self.convert_arg_line_to_args(arg_line):
+ arg_strings.append(arg)
+ arg_strings = self._read_args_from_files(arg_strings)
+ new_arg_strings.extend(arg_strings)
+ finally:
+ args_file.close()
+ except IOError:
+ err = _sys.exc_info()[1]
+ self.error(str(err))
+
+ # return the modified argument list
+ return new_arg_strings
+
+ def convert_arg_line_to_args(self, arg_line):
+ return [arg_line]
+
+ def _match_argument(self, action, arg_strings_pattern):
+ # match the pattern for this action to the arg strings
+ nargs_pattern = self._get_nargs_pattern(action)
+ match = _re.match(nargs_pattern, arg_strings_pattern)
+
+ # raise an exception if we weren't able to find a match
+ if match is None:
+ nargs_errors = {
+ None: _('expected one argument'),
+ OPTIONAL: _('expected at most one argument'),
+ ONE_OR_MORE: _('expected at least one argument'),
+ }
+ default = _('expected %s argument(s)') % action.nargs
+ msg = nargs_errors.get(action.nargs, default)
+ raise ArgumentError(action, msg)
+
+ # return the number of arguments matched
+ return len(match.group(1))
+
+ def _match_arguments_partial(self, actions, arg_strings_pattern):
+ # progressively shorten the actions list by slicing off the
+ # final actions until we find a match
+ result = []
+ for i in range(len(actions), 0, -1):
+ actions_slice = actions[:i]
+ pattern = ''.join([self._get_nargs_pattern(action)
+ for action in actions_slice])
+ match = _re.match(pattern, arg_strings_pattern)
+ if match is not None:
+ result.extend([len(string) for string in match.groups()])
+ break
+
+ # return the list of arg string counts
+ return result
+
+ def _parse_optional(self, arg_string):
+ # if it's an empty string, it was meant to be a positional
+ if not arg_string:
+ return None
+
+ # if it doesn't start with a prefix, it was meant to be positional
+ if not arg_string[0] in self.prefix_chars:
+ return None
+
+ # if the option string is present in the parser, return the action
+ if arg_string in self._option_string_actions:
+ action = self._option_string_actions[arg_string]
+ return action, arg_string, None
+
+ # if it's just a single character, it was meant to be positional
+ if len(arg_string) == 1:
+ return None
+
+ # if the option string before the "=" is present, return the action
+ if '=' in arg_string:
+ option_string, explicit_arg = arg_string.split('=', 1)
+ if option_string in self._option_string_actions:
+ action = self._option_string_actions[option_string]
+ return action, option_string, explicit_arg
+
+ # search through all possible prefixes of the option string
+ # and all actions in the parser for possible interpretations
+ option_tuples = self._get_option_tuples(arg_string)
+
+ # if multiple actions match, the option string was ambiguous
+ if len(option_tuples) > 1:
+ options = ', '.join([option_string
+ for action, option_string, explicit_arg in option_tuples])
+ tup = arg_string, options
+ self.error(_('ambiguous option: %s could match %s') % tup)
+
+ # if exactly one action matched, this segmentation is good,
+ # so return the parsed action
+ elif len(option_tuples) == 1:
+ option_tuple, = option_tuples
+ return option_tuple
+
+ # if it was not found as an option, but it looks like a negative
+ # number, it was meant to be positional
+ # unless there are negative-number-like options
+ if self._negative_number_matcher.match(arg_string):
+ if not self._has_negative_number_optionals:
+ return None
+
+ # if it contains a space, it was meant to be a positional
+ if ' ' in arg_string:
+ return None
+
+ # it was meant to be an optional but there is no such option
+ # in this parser (though it might be a valid option in a subparser)
+ return None, arg_string, None
+
+ def _get_option_tuples(self, option_string):
+ result = []
+
+ # option strings starting with two prefix characters are only
+ # split at the '='
+ chars = self.prefix_chars
+ if option_string[0] in chars and option_string[1] in chars:
+ if '=' in option_string:
+ option_prefix, explicit_arg = option_string.split('=', 1)
+ else:
+ option_prefix = option_string
+ explicit_arg = None
+ for option_string in self._option_string_actions:
+ if option_string.startswith(option_prefix):
+ action = self._option_string_actions[option_string]
+ tup = action, option_string, explicit_arg
+ result.append(tup)
+
+ # single character options can be concatenated with their arguments
+ # but multiple character options always have to have their argument
+ # separate
+ elif option_string[0] in chars and option_string[1] not in chars:
+ option_prefix = option_string
+ explicit_arg = None
+ short_option_prefix = option_string[:2]
+ short_explicit_arg = option_string[2:]
+
+ for option_string in self._option_string_actions:
+ if option_string == short_option_prefix:
+ action = self._option_string_actions[option_string]
+ tup = action, option_string, short_explicit_arg
+ result.append(tup)
+ elif option_string.startswith(option_prefix):
+ action = self._option_string_actions[option_string]
+ tup = action, option_string, explicit_arg
+ result.append(tup)
+
+ # shouldn't ever get here
+ else:
+ self.error(_('unexpected option string: %s') % option_string)
+
+ # return the collected option tuples
+ return result
+
+ def _get_nargs_pattern(self, action):
+ # in all examples below, we have to allow for '--' args
+ # which are represented as '-' in the pattern
+ nargs = action.nargs
+
+ # the default (None) is assumed to be a single argument
+ if nargs is None:
+ nargs_pattern = '(-*A-*)'
+
+ # allow zero or one arguments
+ elif nargs == OPTIONAL:
+ nargs_pattern = '(-*A?-*)'
+
+ # allow zero or more arguments
+ elif nargs == ZERO_OR_MORE:
+ nargs_pattern = '(-*[A-]*)'
+
+ # allow one or more arguments
+ elif nargs == ONE_OR_MORE:
+ nargs_pattern = '(-*A[A-]*)'
+
+ # allow any number of options or arguments
+ elif nargs == REMAINDER:
+ nargs_pattern = '([-AO]*)'
+
+ # allow one argument followed by any number of options or arguments
+ elif nargs == PARSER:
+ nargs_pattern = '(-*A[-AO]*)'
+
+ # all others should be integers
+ else:
+ nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs)
+
+ # if this is an optional action, -- is not allowed
+ if action.option_strings:
+ nargs_pattern = nargs_pattern.replace('-*', '')
+ nargs_pattern = nargs_pattern.replace('-', '')
+
+ # return the pattern
+ return nargs_pattern
+
+ # ========================
+ # Value conversion methods
+ # ========================
+ def _get_values(self, action, arg_strings):
+ # for everything but PARSER, REMAINDER args, strip out first '--'
+ if action.nargs not in [PARSER, REMAINDER]:
+ try:
+ arg_strings.remove('--')
+ except ValueError:
+ pass
+
+ # optional argument produces a default when not present
+ if not arg_strings and action.nargs == OPTIONAL:
+ if action.option_strings:
+ value = action.const
+ else:
+ value = action.default
+ if isinstance(value, basestring):
+ value = self._get_value(action, value)
+ self._check_value(action, value)
+
+ # when nargs='*' on a positional, if there were no command-line
+ # args, use the default if it is anything other than None
+ elif (not arg_strings and action.nargs == ZERO_OR_MORE and
+ not action.option_strings):
+ if action.default is not None:
+ value = action.default
+ else:
+ value = arg_strings
+ self._check_value(action, value)
+
+ # single argument or optional argument produces a single value
+ elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]:
+ arg_string, = arg_strings
+ value = self._get_value(action, arg_string)
+ self._check_value(action, value)
+
+ # REMAINDER arguments convert all values, checking none
+ elif action.nargs == REMAINDER:
+ value = [self._get_value(action, v) for v in arg_strings]
+
+ # PARSER arguments convert all values, but check only the first
+ elif action.nargs == PARSER:
+ value = [self._get_value(action, v) for v in arg_strings]
+ self._check_value(action, value[0])
+
+ # all other types of nargs produce a list
+ else:
+ value = [self._get_value(action, v) for v in arg_strings]
+ for v in value:
+ self._check_value(action, v)
+
+ # return the converted value
+ return value
+
+ def _get_value(self, action, arg_string):
+ type_func = self._registry_get('type', action.type, action.type)
+ if not _callable(type_func):
+ msg = _('%r is not callable')
+ raise ArgumentError(action, msg % type_func)
+
+ # convert the value to the appropriate type
+ try:
+ result = type_func(arg_string)
+
+ # ArgumentTypeErrors indicate errors
+ except ArgumentTypeError:
+ name = getattr(action.type, '__name__', repr(action.type))
+ msg = str(_sys.exc_info()[1])
+ raise ArgumentError(action, msg)
+
+ # TypeErrors or ValueErrors also indicate errors
+ except (TypeError, ValueError):
+ name = getattr(action.type, '__name__', repr(action.type))
+ msg = _('invalid %s value: %r')
+ raise ArgumentError(action, msg % (name, arg_string))
+
+ # return the converted value
+ return result
+
+ def _check_value(self, action, value):
+ # converted value must be one of the choices (if specified)
+ if action.choices is not None and value not in action.choices:
+ tup = value, ', '.join(map(repr, action.choices))
+ msg = _('invalid choice: %r (choose from %s)') % tup
+ raise ArgumentError(action, msg)
+
+ # =======================
+ # Help-formatting methods
+ # =======================
+ def format_usage(self):
+ formatter = self._get_formatter()
+ formatter.add_usage(self.usage, self._actions,
+ self._mutually_exclusive_groups)
+ return formatter.format_help()
+
+ def format_help(self):
+ formatter = self._get_formatter()
+
+ # usage
+ formatter.add_usage(self.usage, self._actions,
+ self._mutually_exclusive_groups)
+
+ # description
+ formatter.add_text(self.description)
+
+ # positionals, optionals and user-defined groups
+ for action_group in self._action_groups:
+ formatter.start_section(action_group.title)
+ formatter.add_text(action_group.description)
+ formatter.add_arguments(action_group._group_actions)
+ formatter.end_section()
+
+ # epilog
+ formatter.add_text(self.epilog)
+
+ # determine help from format above
+ return formatter.format_help()
+
+ def format_version(self):
+ import warnings
+ warnings.warn(
+ 'The format_version method is deprecated -- the "version" '
+ 'argument to ArgumentParser is no longer supported.',
+ DeprecationWarning)
+ formatter = self._get_formatter()
+ formatter.add_text(self.version)
+ return formatter.format_help()
+
+ def _get_formatter(self):
+ return self.formatter_class(prog=self.prog)
+
+ # =====================
+ # Help-printing methods
+ # =====================
+ def print_usage(self, file=None):
+ if file is None:
+ file = _sys.stdout
+ self._print_message(self.format_usage(), file)
+
+ def print_help(self, file=None):
+ if file is None:
+ file = _sys.stdout
+ self._print_message(self.format_help(), file)
+
+ def print_version(self, file=None):
+ import warnings
+ warnings.warn(
+ 'The print_version method is deprecated -- the "version" '
+ 'argument to ArgumentParser is no longer supported.',
+ DeprecationWarning)
+ self._print_message(self.format_version(), file)
+
+ def _print_message(self, message, file=None):
+ if message:
+ if file is None:
+ file = _sys.stderr
+ file.write(message)
+
+ # ===============
+ # Exiting methods
+ # ===============
+ def exit(self, status=0, message=None):
+ if message:
+ self._print_message(message, _sys.stderr)
+ _sys.exit(status)
+
+ def error(self, message):
+ """error(message: string)
+
+ Prints a usage message incorporating the message to stderr and
+ exits.
+
+ If you override this in a subclass, it should not return -- it
+ should either exit or raise an exception.
+ """
+ self.print_usage(_sys.stderr)
+ self.exit(2, _('%s: error: %s\n') % (self.prog, message))
diff --git a/cashew/Lib/ast.py b/cashew/Lib/ast.py
new file mode 100644
index 0000000..fd5dfdb
--- /dev/null
+++ b/cashew/Lib/ast.py
@@ -0,0 +1,311 @@
+# -*- coding: utf-8 -*-
+"""
+ ast
+ ~~~
+
+ The `ast` module helps Python applications to process trees of the Python
+ abstract syntax grammar. The abstract syntax itself might change with
+ each Python release; this module helps to find out programmatically what
+ the current grammar looks like and allows modifications of it.
+
+ An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
+ a flag to the `compile()` builtin function or by using the `parse()`
+ function from this module. The result will be a tree of objects whose
+ classes all inherit from `ast.AST`.
+
+ A modified abstract syntax tree can be compiled into a Python code object
+ using the built-in `compile()` function.
+
+ Additionally various helper functions are provided that make working with
+ the trees simpler. The main intention of the helper functions and this
+ module in general is to provide an easy to use interface for libraries
+ that work tightly with the python syntax (template engines for example).
+
+
+ :copyright: Copyright 2008 by Armin Ronacher.
+ :license: Python License.
+"""
+from _ast import *
+from _ast import __version__
+
+
+def parse(source, filename='', mode='exec'):
+ """
+ Parse the source into an AST node.
+ Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
+ """
+ return compile(source, filename, mode, PyCF_ONLY_AST)
+
+
+def literal_eval(node_or_string):
+ """
+ Safely evaluate an expression node or a string containing a Python
+ expression. The string or node provided may only consist of the following
+ Python literal structures: strings, numbers, tuples, lists, dicts, booleans,
+ and None.
+ """
+ _safe_names = {'None': None, 'True': True, 'False': False}
+ if isinstance(node_or_string, basestring):
+ node_or_string = parse(node_or_string, mode='eval')
+ if isinstance(node_or_string, Expression):
+ node_or_string = node_or_string.body
+ def _convert(node):
+ if isinstance(node, Str):
+ return node.s
+ elif isinstance(node, Num):
+ return node.n
+ elif isinstance(node, Tuple):
+ return tuple(map(_convert, node.elts))
+ elif isinstance(node, List):
+ return list(map(_convert, node.elts))
+ elif isinstance(node, Dict):
+ return dict((_convert(k), _convert(v)) for k, v
+ in zip(node.keys, node.values))
+ elif isinstance(node, Name):
+ if node.id in _safe_names:
+ return _safe_names[node.id]
+ elif isinstance(node, BinOp) and \
+ isinstance(node.op, (Add, Sub)) and \
+ isinstance(node.right, Num) and \
+ isinstance(node.right.n, complex) and \
+ isinstance(node.left, Num) and \
+ isinstance(node.left.n, (int, long, float)):
+ left = node.left.n
+ right = node.right.n
+ if isinstance(node.op, Add):
+ return left + right
+ else:
+ return left - right
+ raise ValueError('malformed string')
+ return _convert(node_or_string)
+
+
+def dump(node, annotate_fields=True, include_attributes=False):
+ """
+ Return a formatted dump of the tree in *node*. This is mainly useful for
+ debugging purposes. The returned string will show the names and the values
+ for fields. This makes the code impossible to evaluate, so if evaluation is
+ wanted *annotate_fields* must be set to False. Attributes such as line
+ numbers and column offsets are not dumped by default. If this is wanted,
+ *include_attributes* can be set to True.
+ """
+ def _format(node):
+ if isinstance(node, AST):
+ fields = [(a, _format(b)) for a, b in iter_fields(node)]
+ rv = '%s(%s' % (node.__class__.__name__, ', '.join(
+ ('%s=%s' % field for field in fields)
+ if annotate_fields else
+ (b for a, b in fields)
+ ))
+ if include_attributes and node._attributes:
+ rv += fields and ', ' or ' '
+ rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
+ for a in node._attributes)
+ return rv + ')'
+ elif isinstance(node, list):
+ return '[%s]' % ', '.join(_format(x) for x in node)
+ return repr(node)
+ if not isinstance(node, AST):
+ raise TypeError('expected AST, got %r' % node.__class__.__name__)
+ return _format(node)
+
+
+def copy_location(new_node, old_node):
+ """
+ Copy source location (`lineno` and `col_offset` attributes) from
+ *old_node* to *new_node* if possible, and return *new_node*.
+ """
+ for attr in 'lineno', 'col_offset':
+ if attr in old_node._attributes and attr in new_node._attributes \
+ and hasattr(old_node, attr):
+ setattr(new_node, attr, getattr(old_node, attr))
+ return new_node
+
+
+def fix_missing_locations(node):
+ """
+ When you compile a node tree with compile(), the compiler expects lineno and
+ col_offset attributes for every node that supports them. This is rather
+ tedious to fill in for generated nodes, so this helper adds these attributes
+ recursively where not already set, by setting them to the values of the
+ parent node. It works recursively starting at *node*.
+ """
+ def _fix(node, lineno, col_offset):
+ if 'lineno' in node._attributes:
+ if not hasattr(node, 'lineno'):
+ node.lineno = lineno
+ else:
+ lineno = node.lineno
+ if 'col_offset' in node._attributes:
+ if not hasattr(node, 'col_offset'):
+ node.col_offset = col_offset
+ else:
+ col_offset = node.col_offset
+ for child in iter_child_nodes(node):
+ _fix(child, lineno, col_offset)
+ _fix(node, 1, 0)
+ return node
+
+
+def increment_lineno(node, n=1):
+ """
+ Increment the line number of each node in the tree starting at *node* by *n*.
+ This is useful to "move code" to a different location in a file.
+ """
+ for child in walk(node):
+ if 'lineno' in child._attributes:
+ child.lineno = getattr(child, 'lineno', 0) + n
+ return node
+
+
+def iter_fields(node):
+ """
+ Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
+ that is present on *node*.
+ """
+ for field in node._fields:
+ try:
+ yield field, getattr(node, field)
+ except AttributeError:
+ pass
+
+
+def iter_child_nodes(node):
+ """
+ Yield all direct child nodes of *node*, that is, all fields that are nodes
+ and all items of fields that are lists of nodes.
+ """
+ for name, field in iter_fields(node):
+ if isinstance(field, AST):
+ yield field
+ elif isinstance(field, list):
+ for item in field:
+ if isinstance(item, AST):
+ yield item
+
+
+def get_docstring(node, clean=True):
+ """
+ Return the docstring for the given node or None if no docstring can
+ be found. If the node provided does not have docstrings a TypeError
+ will be raised.
+ """
+ if not isinstance(node, (FunctionDef, ClassDef, Module)):
+ raise TypeError("%r can't have docstrings" % node.__class__.__name__)
+ if node.body and isinstance(node.body[0], Expr) and \
+ isinstance(node.body[0].value, Str):
+ if clean:
+ import inspect
+ return inspect.cleandoc(node.body[0].value.s)
+ return node.body[0].value.s
+
+
+def walk(node):
+ """
+ Recursively yield all descendant nodes in the tree starting at *node*
+ (including *node* itself), in no specified order. This is useful if you
+ only want to modify nodes in place and don't care about the context.
+ """
+ from collections import deque
+ todo = deque([node])
+ while todo:
+ node = todo.popleft()
+ todo.extend(iter_child_nodes(node))
+ yield node
+
+
+class NodeVisitor(object):
+ """
+ A node visitor base class that walks the abstract syntax tree and calls a
+ visitor function for every node found. This function may return a value
+ which is forwarded by the `visit` method.
+
+ This class is meant to be subclassed, with the subclass adding visitor
+ methods.
+
+ Per default the visitor functions for the nodes are ``'visit_'`` +
+ class name of the node. So a `TryFinally` node visit function would
+ be `visit_TryFinally`. This behavior can be changed by overriding
+ the `visit` method. If no visitor function exists for a node
+ (return value `None`) the `generic_visit` visitor is used instead.
+
+ Don't use the `NodeVisitor` if you want to apply changes to nodes during
+ traversing. For this a special visitor exists (`NodeTransformer`) that
+ allows modifications.
+ """
+
+ def visit(self, node):
+ """Visit a node."""
+ method = 'visit_' + node.__class__.__name__
+ visitor = getattr(self, method, self.generic_visit)
+ return visitor(node)
+
+ def generic_visit(self, node):
+ """Called if no explicit visitor function exists for a node."""
+ for field, value in iter_fields(node):
+ if isinstance(value, list):
+ for item in value:
+ if isinstance(item, AST):
+ self.visit(item)
+ elif isinstance(value, AST):
+ self.visit(value)
+
+
+class NodeTransformer(NodeVisitor):
+ """
+ A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
+ allows modification of nodes.
+
+ The `NodeTransformer` will walk the AST and use the return value of the
+ visitor methods to replace or remove the old node. If the return value of
+ the visitor method is ``None``, the node will be removed from its location,
+ otherwise it is replaced with the return value. The return value may be the
+ original node in which case no replacement takes place.
+
+ Here is an example transformer that rewrites all occurrences of name lookups
+ (``foo``) to ``data['foo']``::
+
+ class RewriteName(NodeTransformer):
+
+ def visit_Name(self, node):
+ return copy_location(Subscript(
+ value=Name(id='data', ctx=Load()),
+ slice=Index(value=Str(s=node.id)),
+ ctx=node.ctx
+ ), node)
+
+ Keep in mind that if the node you're operating on has child nodes you must
+ either transform the child nodes yourself or call the :meth:`generic_visit`
+ method for the node first.
+
+ For nodes that were part of a collection of statements (that applies to all
+ statement nodes), the visitor may also return a list of nodes rather than
+ just a single node.
+
+ Usually you use the transformer like this::
+
+ node = YourTransformer().visit(node)
+ """
+
+ def generic_visit(self, node):
+ for field, old_value in iter_fields(node):
+ old_value = getattr(node, field, None)
+ if isinstance(old_value, list):
+ new_values = []
+ for value in old_value:
+ if isinstance(value, AST):
+ value = self.visit(value)
+ if value is None:
+ continue
+ elif not isinstance(value, AST):
+ new_values.extend(value)
+ continue
+ new_values.append(value)
+ old_value[:] = new_values
+ elif isinstance(old_value, AST):
+ new_node = self.visit(old_value)
+ if new_node is None:
+ delattr(node, field)
+ else:
+ setattr(node, field, new_node)
+ return node
diff --git a/cashew/Lib/asynchat.py b/cashew/Lib/asynchat.py
new file mode 100644
index 0000000..392ee61
--- /dev/null
+++ b/cashew/Lib/asynchat.py
@@ -0,0 +1,321 @@
+# -*- Mode: Python; tab-width: 4 -*-
+# Id: asynchat.py,v 2.26 2000/09/07 22:29:26 rushing Exp
+# Author: Sam Rushing
+
+# ======================================================================
+# Copyright 1996 by Sam Rushing
+#
+# All Rights Reserved
+#
+# Permission to use, copy, modify, and distribute this software and
+# its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of Sam
+# Rushing not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
+# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+# ======================================================================
+
+r"""A class supporting chat-style (command/response) protocols.
+
+This class adds support for 'chat' style protocols - where one side
+sends a 'command', and the other sends a response (examples would be
+the common internet protocols - smtp, nntp, ftp, etc..).
+
+The handle_read() method looks at the input stream for the current
+'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n'
+for multi-line output), calling self.found_terminator() on its
+receipt.
+
+for example:
+Say you build an async nntp client using this class. At the start
+of the connection, you'll have self.terminator set to '\r\n', in
+order to process the single-line greeting. Just before issuing a
+'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST
+command will be accumulated (using your own 'collect_incoming_data'
+method) up to the terminator, and then control will be returned to
+you - by calling your self.found_terminator() method.
+"""
+
+import asyncore
+import errno
+import socket
+from collections import deque
+from sys import py3kwarning
+from warnings import filterwarnings, catch_warnings
+
+_BLOCKING_IO_ERRORS = (errno.EAGAIN, errno.EALREADY, errno.EINPROGRESS,
+ errno.EWOULDBLOCK)
+
+
+class async_chat (asyncore.dispatcher):
+ """This is an abstract class. You must derive from this class, and add
+ the two methods collect_incoming_data() and found_terminator()"""
+
+ # these are overridable defaults
+
+ ac_in_buffer_size = 4096
+ ac_out_buffer_size = 4096
+
+ def __init__ (self, sock=None, map=None):
+ # for string terminator matching
+ self.ac_in_buffer = ''
+
+ # we use a list here rather than cStringIO for a few reasons...
+ # del lst[:] is faster than sio.truncate(0)
+ # lst = [] is faster than sio.truncate(0)
+ # cStringIO will be gaining unicode support in py3k, which
+ # will negatively affect the performance of bytes compared to
+ # a ''.join() equivalent
+ self.incoming = []
+
+ # we toss the use of the "simple producer" and replace it with
+ # a pure deque, which the original fifo was a wrapping of
+ self.producer_fifo = deque()
+ asyncore.dispatcher.__init__ (self, sock, map)
+
+ def collect_incoming_data(self, data):
+ raise NotImplementedError("must be implemented in subclass")
+
+ def _collect_incoming_data(self, data):
+ self.incoming.append(data)
+
+ def _get_data(self):
+ d = ''.join(self.incoming)
+ del self.incoming[:]
+ return d
+
+ def found_terminator(self):
+ raise NotImplementedError("must be implemented in subclass")
+
+ def set_terminator (self, term):
+ "Set the input delimiter. Can be a fixed string of any length, an integer, or None"
+ self.terminator = term
+
+ def get_terminator (self):
+ return self.terminator
+
+ # grab some more data from the socket,
+ # throw it to the collector method,
+ # check for the terminator,
+ # if found, transition to the next state.
+
+ def handle_read (self):
+
+ try:
+ data = self.recv (self.ac_in_buffer_size)
+ except socket.error, why:
+ if why.args[0] in _BLOCKING_IO_ERRORS:
+ return
+ self.handle_error()
+ return
+
+ self.ac_in_buffer = self.ac_in_buffer + data
+
+ # Continue to search for self.terminator in self.ac_in_buffer,
+ # while calling self.collect_incoming_data. The while loop
+ # is necessary because we might read several data+terminator
+ # combos with a single recv(4096).
+
+ while self.ac_in_buffer:
+ lb = len(self.ac_in_buffer)
+ terminator = self.get_terminator()
+ if not terminator:
+ # no terminator, collect it all
+ self.collect_incoming_data (self.ac_in_buffer)
+ self.ac_in_buffer = ''
+ elif isinstance(terminator, (int, long)):
+ # numeric terminator
+ n = terminator
+ if lb < n:
+ self.collect_incoming_data (self.ac_in_buffer)
+ self.ac_in_buffer = ''
+ self.terminator = self.terminator - lb
+ else:
+ self.collect_incoming_data (self.ac_in_buffer[:n])
+ self.ac_in_buffer = self.ac_in_buffer[n:]
+ self.terminator = 0
+ self.found_terminator()
+ else:
+ # 3 cases:
+ # 1) end of buffer matches terminator exactly:
+ # collect data, transition
+ # 2) end of buffer matches some prefix:
+ # collect data to the prefix
+ # 3) end of buffer does not match any prefix:
+ # collect data
+ terminator_len = len(terminator)
+ index = self.ac_in_buffer.find(terminator)
+ if index != -1:
+ # we found the terminator
+ if index > 0:
+ # don't bother reporting the empty string (source of subtle bugs)
+ self.collect_incoming_data (self.ac_in_buffer[:index])
+ self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:]
+ # This does the Right Thing if the terminator is changed here.
+ self.found_terminator()
+ else:
+ # check for a prefix of the terminator
+ index = find_prefix_at_end (self.ac_in_buffer, terminator)
+ if index:
+ if index != lb:
+ # we found a prefix, collect up to the prefix
+ self.collect_incoming_data (self.ac_in_buffer[:-index])
+ self.ac_in_buffer = self.ac_in_buffer[-index:]
+ break
+ else:
+ # no prefix, collect it all
+ self.collect_incoming_data (self.ac_in_buffer)
+ self.ac_in_buffer = ''
+
+ def handle_write (self):
+ self.initiate_send()
+
+ def handle_close (self):
+ self.close()
+
+ def push (self, data):
+ sabs = self.ac_out_buffer_size
+ if len(data) > sabs:
+ for i in xrange(0, len(data), sabs):
+ self.producer_fifo.append(data[i:i+sabs])
+ else:
+ self.producer_fifo.append(data)
+ self.initiate_send()
+
+ def push_with_producer (self, producer):
+ self.producer_fifo.append(producer)
+ self.initiate_send()
+
+ def readable (self):
+ "predicate for inclusion in the readable for select()"
+ # cannot use the old predicate, it violates the claim of the
+ # set_terminator method.
+
+ # return (len(self.ac_in_buffer) <= self.ac_in_buffer_size)
+ return 1
+
+ def writable (self):
+ "predicate for inclusion in the writable for select()"
+ return self.producer_fifo or (not self.connected)
+
+ def close_when_done (self):
+ "automatically close this channel once the outgoing queue is empty"
+ self.producer_fifo.append(None)
+
+ def initiate_send(self):
+ while self.producer_fifo and self.connected:
+ first = self.producer_fifo[0]
+ # handle empty string/buffer or None entry
+ if not first:
+ del self.producer_fifo[0]
+ if first is None:
+ self.handle_close()
+ return
+
+ # handle classic producer behavior
+ obs = self.ac_out_buffer_size
+ try:
+ with catch_warnings():
+ if py3kwarning:
+ filterwarnings("ignore", ".*buffer", DeprecationWarning)
+ data = buffer(first, 0, obs)
+ except TypeError:
+ data = first.more()
+ if data:
+ self.producer_fifo.appendleft(data)
+ else:
+ del self.producer_fifo[0]
+ continue
+
+ # send the data
+ try:
+ num_sent = self.send(data)
+ except socket.error:
+ self.handle_error()
+ return
+
+ if num_sent:
+ if num_sent < len(data) or obs < len(first):
+ self.producer_fifo[0] = first[num_sent:]
+ else:
+ del self.producer_fifo[0]
+ # we tried to send some actual data
+ return
+
+ def discard_buffers (self):
+ # Emergencies only!
+ self.ac_in_buffer = ''
+ del self.incoming[:]
+ self.producer_fifo.clear()
+
+class simple_producer:
+
+ def __init__ (self, data, buffer_size=512):
+ self.data = data
+ self.buffer_size = buffer_size
+
+ def more (self):
+ if len (self.data) > self.buffer_size:
+ result = self.data[:self.buffer_size]
+ self.data = self.data[self.buffer_size:]
+ return result
+ else:
+ result = self.data
+ self.data = ''
+ return result
+
+class fifo:
+ def __init__ (self, list=None):
+ if not list:
+ self.list = deque()
+ else:
+ self.list = deque(list)
+
+ def __len__ (self):
+ return len(self.list)
+
+ def is_empty (self):
+ return not self.list
+
+ def first (self):
+ return self.list[0]
+
+ def push (self, data):
+ self.list.append(data)
+
+ def pop (self):
+ if self.list:
+ return (1, self.list.popleft())
+ else:
+ return (0, None)
+
+# Given 'haystack', see if any prefix of 'needle' is at its end. This
+# assumes an exact match has already been checked. Return the number of
+# characters matched.
+# for example:
+# f_p_a_e ("qwerty\r", "\r\n") => 1
+# f_p_a_e ("qwertydkjf", "\r\n") => 0
+# f_p_a_e ("qwerty\r\n", "\r\n") =>
+
+# this could maybe be made faster with a computed regex?
+# [answer: no; circa Python-2.0, Jan 2001]
+# new python: 28961/s
+# old python: 18307/s
+# re: 12820/s
+# regex: 14035/s
+
+def find_prefix_at_end (haystack, needle):
+ l = len(needle) - 1
+ while l and not haystack.endswith(needle[:l]):
+ l -= 1
+ return l
diff --git a/cashew/Lib/asyncore.py b/cashew/Lib/asyncore.py
new file mode 100644
index 0000000..105982f
--- /dev/null
+++ b/cashew/Lib/asyncore.py
@@ -0,0 +1,663 @@
+# -*- Mode: Python -*-
+# Id: asyncore.py,v 2.51 2000/09/07 22:29:26 rushing Exp
+# Author: Sam Rushing
+
+# ======================================================================
+# Copyright 1996 by Sam Rushing
+#
+# All Rights Reserved
+#
+# Permission to use, copy, modify, and distribute this software and
+# its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of Sam
+# Rushing not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
+# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+# ======================================================================
+
+"""Basic infrastructure for asynchronous socket service clients and servers.
+
+There are only two ways to have a program on a single processor do "more
+than one thing at a time". Multi-threaded programming is the simplest and
+most popular way to do it, but there is another very different technique,
+that lets you have nearly all the advantages of multi-threading, without
+actually using multiple threads. it's really only practical if your program
+is largely I/O bound. If your program is CPU bound, then pre-emptive
+scheduled threads are probably what you really need. Network servers are
+rarely CPU-bound, however.
+
+If your operating system supports the select() system call in its I/O
+library (and nearly all do), then you can use it to juggle multiple
+communication channels at once; doing other work while your I/O is taking
+place in the "background." Although this strategy can seem strange and
+complex, especially at first, it is in many ways easier to understand and
+control than multi-threaded programming. The module documented here solves
+many of the difficult problems for you, making the task of building
+sophisticated high-performance network servers and clients a snap.
+"""
+
+import select
+import socket
+import sys
+import time
+import warnings
+
+import os
+from errno import EALREADY, EINPROGRESS, EWOULDBLOCK, ECONNRESET, EINVAL, \
+ ENOTCONN, ESHUTDOWN, EINTR, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \
+ errorcode
+
+_DISCONNECTED = frozenset((ECONNRESET, ENOTCONN, ESHUTDOWN, ECONNABORTED, EPIPE,
+ EBADF))
+
+try:
+ socket_map
+except NameError:
+ socket_map = {}
+
+def _strerror(err):
+ try:
+ return os.strerror(err)
+ except (ValueError, OverflowError, NameError):
+ if err in errorcode:
+ return errorcode[err]
+ return "Unknown error %s" %err
+
+class ExitNow(Exception):
+ pass
+
+_reraised_exceptions = (ExitNow, KeyboardInterrupt, SystemExit)
+
+def read(obj):
+ try:
+ obj.handle_read_event()
+ except _reraised_exceptions:
+ raise
+ except:
+ obj.handle_error()
+
+def write(obj):
+ try:
+ obj.handle_write_event()
+ except _reraised_exceptions:
+ raise
+ except:
+ obj.handle_error()
+
+def _exception(obj):
+ try:
+ obj.handle_expt_event()
+ except _reraised_exceptions:
+ raise
+ except:
+ obj.handle_error()
+
+def readwrite(obj, flags):
+ try:
+ if flags & select.POLLIN:
+ obj.handle_read_event()
+ if flags & select.POLLOUT:
+ obj.handle_write_event()
+ if flags & select.POLLPRI:
+ obj.handle_expt_event()
+ if flags & (select.POLLHUP | select.POLLERR | select.POLLNVAL):
+ obj.handle_close()
+ except socket.error, e:
+ if e.args[0] not in _DISCONNECTED:
+ obj.handle_error()
+ else:
+ obj.handle_close()
+ except _reraised_exceptions:
+ raise
+ except:
+ obj.handle_error()
+
+def poll(timeout=0.0, map=None):
+ if map is None:
+ map = socket_map
+ if map:
+ r = []; w = []; e = []
+ for fd, obj in map.items():
+ is_r = obj.readable()
+ is_w = obj.writable()
+ if is_r:
+ r.append(fd)
+ # accepting sockets should not be writable
+ if is_w and not obj.accepting:
+ w.append(fd)
+ if is_r or is_w:
+ e.append(fd)
+ if [] == r == w == e:
+ time.sleep(timeout)
+ return
+
+ try:
+ r, w, e = select.select(r, w, e, timeout)
+ except select.error, err:
+ if err.args[0] != EINTR:
+ raise
+ else:
+ return
+
+ for fd in r:
+ obj = map.get(fd)
+ if obj is None:
+ continue
+ read(obj)
+
+ for fd in w:
+ obj = map.get(fd)
+ if obj is None:
+ continue
+ write(obj)
+
+ for fd in e:
+ obj = map.get(fd)
+ if obj is None:
+ continue
+ _exception(obj)
+
+def poll2(timeout=0.0, map=None):
+ # Use the poll() support added to the select module in Python 2.0
+ if map is None:
+ map = socket_map
+ if timeout is not None:
+ # timeout is in milliseconds
+ timeout = int(timeout*1000)
+ pollster = select.poll()
+ if map:
+ for fd, obj in map.items():
+ flags = 0
+ if obj.readable():
+ flags |= select.POLLIN | select.POLLPRI
+ # accepting sockets should not be writable
+ if obj.writable() and not obj.accepting:
+ flags |= select.POLLOUT
+ if flags:
+ # Only check for exceptions if object was either readable
+ # or writable.
+ flags |= select.POLLERR | select.POLLHUP | select.POLLNVAL
+ pollster.register(fd, flags)
+ try:
+ r = pollster.poll(timeout)
+ except select.error, err:
+ if err.args[0] != EINTR:
+ raise
+ r = []
+ for fd, flags in r:
+ obj = map.get(fd)
+ if obj is None:
+ continue
+ readwrite(obj, flags)
+
+poll3 = poll2 # Alias for backward compatibility
+
+def loop(timeout=30.0, use_poll=False, map=None, count=None):
+ if map is None:
+ map = socket_map
+
+ if use_poll and hasattr(select, 'poll'):
+ poll_fun = poll2
+ else:
+ poll_fun = poll
+
+ if count is None:
+ while map:
+ poll_fun(timeout, map)
+
+ else:
+ while map and count > 0:
+ poll_fun(timeout, map)
+ count = count - 1
+
+class dispatcher:
+
+ debug = False
+ connected = False
+ accepting = False
+ connecting = False
+ closing = False
+ addr = None
+ ignore_log_types = frozenset(['warning'])
+
+ def __init__(self, sock=None, map=None):
+ if map is None:
+ self._map = socket_map
+ else:
+ self._map = map
+
+ self._fileno = None
+
+ if sock:
+ # Set to nonblocking just to make sure for cases where we
+ # get a socket from a blocking source.
+ sock.setblocking(0)
+ self.set_socket(sock, map)
+ self.connected = True
+ # The constructor no longer requires that the socket
+ # passed be connected.
+ try:
+ self.addr = sock.getpeername()
+ except socket.error, err:
+ if err.args[0] in (ENOTCONN, EINVAL):
+ # To handle the case where we got an unconnected
+ # socket.
+ self.connected = False
+ else:
+ # The socket is broken in some unknown way, alert
+ # the user and remove it from the map (to prevent
+ # polling of broken sockets).
+ self.del_channel(map)
+ raise
+ else:
+ self.socket = None
+
+ def __repr__(self):
+ status = [self.__class__.__module__+"."+self.__class__.__name__]
+ if self.accepting and self.addr:
+ status.append('listening')
+ elif self.connected:
+ status.append('connected')
+ if self.addr is not None:
+ try:
+ status.append('%s:%d' % self.addr)
+ except TypeError:
+ status.append(repr(self.addr))
+ return '<%s at %#x>' % (' '.join(status), id(self))
+
+ __str__ = __repr__
+
+ def add_channel(self, map=None):
+ #self.log_info('adding channel %s' % self)
+ if map is None:
+ map = self._map
+ map[self._fileno] = self
+
+ def del_channel(self, map=None):
+ fd = self._fileno
+ if map is None:
+ map = self._map
+ if fd in map:
+ #self.log_info('closing channel %d:%s' % (fd, self))
+ del map[fd]
+ self._fileno = None
+
+ def create_socket(self, family, type):
+ self.family_and_type = family, type
+ sock = socket.socket(family, type)
+ sock.setblocking(0)
+ self.set_socket(sock)
+
+ def set_socket(self, sock, map=None):
+ self.socket = sock
+## self.__dict__['socket'] = sock
+ self._fileno = sock.fileno()
+ self.add_channel(map)
+
+ def set_reuse_addr(self):
+ # try to re-use a server port if possible
+ try:
+ self.socket.setsockopt(
+ socket.SOL_SOCKET, socket.SO_REUSEADDR,
+ self.socket.getsockopt(socket.SOL_SOCKET,
+ socket.SO_REUSEADDR) | 1
+ )
+ except socket.error:
+ pass
+
+ # ==================================================
+ # predicates for select()
+ # these are used as filters for the lists of sockets
+ # to pass to select().
+ # ==================================================
+
+ def readable(self):
+ return True
+
+ def writable(self):
+ return True
+
+ # ==================================================
+ # socket object methods.
+ # ==================================================
+
+ def listen(self, num):
+ self.accepting = True
+ if os.name == 'nt' and num > 5:
+ num = 5
+ return self.socket.listen(num)
+
+ def bind(self, addr):
+ self.addr = addr
+ return self.socket.bind(addr)
+
+ def connect(self, address):
+ self.connected = False
+ self.connecting = True
+ err = self.socket.connect_ex(address)
+ if err in (EINPROGRESS, EALREADY, EWOULDBLOCK) \
+ or err == EINVAL and os.name in ('nt', 'ce'):
+ self.addr = address
+ return
+ if err in (0, EISCONN):
+ self.addr = address
+ self.handle_connect_event()
+ else:
+ raise socket.error(err, errorcode[err])
+
+ def accept(self):
+ # XXX can return either an address pair or None
+ try:
+ conn, addr = self.socket.accept()
+ except TypeError:
+ return None
+ except socket.error as why:
+ if why.args[0] in (EWOULDBLOCK, ECONNABORTED, EAGAIN):
+ return None
+ else:
+ raise
+ else:
+ return conn, addr
+
+ def send(self, data):
+ try:
+ result = self.socket.send(data)
+ return result
+ except socket.error, why:
+ if why.args[0] == EWOULDBLOCK:
+ return 0
+ elif why.args[0] in _DISCONNECTED:
+ self.handle_close()
+ return 0
+ else:
+ raise
+
+ def recv(self, buffer_size):
+ try:
+ data = self.socket.recv(buffer_size)
+ if not data:
+ # a closed connection is indicated by signaling
+ # a read condition, and having recv() return 0.
+ self.handle_close()
+ return ''
+ else:
+ return data
+ except socket.error, why:
+ # winsock sometimes raises ENOTCONN
+ if why.args[0] in _DISCONNECTED:
+ self.handle_close()
+ return ''
+ else:
+ raise
+
+ def close(self):
+ self.connected = False
+ self.accepting = False
+ self.connecting = False
+ self.del_channel()
+ try:
+ self.socket.close()
+ except socket.error, why:
+ if why.args[0] not in (ENOTCONN, EBADF):
+ raise
+
+ # cheap inheritance, used to pass all other attribute
+ # references to the underlying socket object.
+ def __getattr__(self, attr):
+ try:
+ retattr = getattr(self.socket, attr)
+ except AttributeError:
+ raise AttributeError("%s instance has no attribute '%s'"
+ %(self.__class__.__name__, attr))
+ else:
+ msg = "%(me)s.%(attr)s is deprecated. Use %(me)s.socket.%(attr)s " \
+ "instead." % {'me': self.__class__.__name__, 'attr':attr}
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
+ return retattr
+
+ # log and log_info may be overridden to provide more sophisticated
+ # logging and warning methods. In general, log is for 'hit' logging
+ # and 'log_info' is for informational, warning and error logging.
+
+ def log(self, message):
+ sys.stderr.write('log: %s\n' % str(message))
+
+ def log_info(self, message, type='info'):
+ if type not in self.ignore_log_types:
+ print '%s: %s' % (type, message)
+
+ def handle_read_event(self):
+ if self.accepting:
+ # accepting sockets are never connected, they "spawn" new
+ # sockets that are connected
+ self.handle_accept()
+ elif not self.connected:
+ if self.connecting:
+ self.handle_connect_event()
+ self.handle_read()
+ else:
+ self.handle_read()
+
+ def handle_connect_event(self):
+ err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR)
+ if err != 0:
+ raise socket.error(err, _strerror(err))
+ self.handle_connect()
+ self.connected = True
+ self.connecting = False
+
+ def handle_write_event(self):
+ if self.accepting:
+ # Accepting sockets shouldn't get a write event.
+ # We will pretend it didn't happen.
+ return
+
+ if not self.connected:
+ if self.connecting:
+ self.handle_connect_event()
+ self.handle_write()
+
+ def handle_expt_event(self):
+ # handle_expt_event() is called if there might be an error on the
+ # socket, or if there is OOB data
+ # check for the error condition first
+ err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR)
+ if err != 0:
+ # we can get here when select.select() says that there is an
+ # exceptional condition on the socket
+ # since there is an error, we'll go ahead and close the socket
+ # like we would in a subclassed handle_read() that received no
+ # data
+ self.handle_close()
+ else:
+ self.handle_expt()
+
+ def handle_error(self):
+ nil, t, v, tbinfo = compact_traceback()
+
+ # sometimes a user repr method will crash.
+ try:
+ self_repr = repr(self)
+ except:
+ self_repr = '<__repr__(self) failed for object at %0x>' % id(self)
+
+ self.log_info(
+ 'uncaptured python exception, closing channel %s (%s:%s %s)' % (
+ self_repr,
+ t,
+ v,
+ tbinfo
+ ),
+ 'error'
+ )
+ self.handle_close()
+
+ def handle_expt(self):
+ self.log_info('unhandled incoming priority event', 'warning')
+
+ def handle_read(self):
+ self.log_info('unhandled read event', 'warning')
+
+ def handle_write(self):
+ self.log_info('unhandled write event', 'warning')
+
+ def handle_connect(self):
+ self.log_info('unhandled connect event', 'warning')
+
+ def handle_accept(self):
+ self.log_info('unhandled accept event', 'warning')
+
+ def handle_close(self):
+ self.log_info('unhandled close event', 'warning')
+ self.close()
+
+# ---------------------------------------------------------------------------
+# adds simple buffered output capability, useful for simple clients.
+# [for more sophisticated usage use asynchat.async_chat]
+# ---------------------------------------------------------------------------
+
+class dispatcher_with_send(dispatcher):
+
+ def __init__(self, sock=None, map=None):
+ dispatcher.__init__(self, sock, map)
+ self.out_buffer = ''
+
+ def initiate_send(self):
+ num_sent = 0
+ num_sent = dispatcher.send(self, self.out_buffer[:512])
+ self.out_buffer = self.out_buffer[num_sent:]
+
+ def handle_write(self):
+ self.initiate_send()
+
+ def writable(self):
+ return (not self.connected) or len(self.out_buffer)
+
+ def send(self, data):
+ if self.debug:
+ self.log_info('sending %s' % repr(data))
+ self.out_buffer = self.out_buffer + data
+ self.initiate_send()
+
+# ---------------------------------------------------------------------------
+# used for debugging.
+# ---------------------------------------------------------------------------
+
+def compact_traceback():
+ t, v, tb = sys.exc_info()
+ tbinfo = []
+ if not tb: # Must have a traceback
+ raise AssertionError("traceback does not exist")
+ while tb:
+ tbinfo.append((
+ tb.tb_frame.f_code.co_filename,
+ tb.tb_frame.f_code.co_name,
+ str(tb.tb_lineno)
+ ))
+ tb = tb.tb_next
+
+ # just to be safe
+ del tb
+
+ file, function, line = tbinfo[-1]
+ info = ' '.join(['[%s|%s|%s]' % x for x in tbinfo])
+ return (file, function, line), t, v, info
+
+def close_all(map=None, ignore_all=False):
+ if map is None:
+ map = socket_map
+ for x in map.values():
+ try:
+ x.close()
+ except OSError, x:
+ if x.args[0] == EBADF:
+ pass
+ elif not ignore_all:
+ raise
+ except _reraised_exceptions:
+ raise
+ except:
+ if not ignore_all:
+ raise
+ map.clear()
+
+# Asynchronous File I/O:
+#
+# After a little research (reading man pages on various unixen, and
+# digging through the linux kernel), I've determined that select()
+# isn't meant for doing asynchronous file i/o.
+# Heartening, though - reading linux/mm/filemap.c shows that linux
+# supports asynchronous read-ahead. So _MOST_ of the time, the data
+# will be sitting in memory for us already when we go to read it.
+#
+# What other OS's (besides NT) support async file i/o? [VMS?]
+#
+# Regardless, this is useful for pipes, and stdin/stdout...
+
+if os.name == 'posix':
+ import fcntl
+
+ class file_wrapper:
+ # Here we override just enough to make a file
+ # look like a socket for the purposes of asyncore.
+ # The passed fd is automatically os.dup()'d
+
+ def __init__(self, fd):
+ self.fd = os.dup(fd)
+
+ def recv(self, *args):
+ return os.read(self.fd, *args)
+
+ def send(self, *args):
+ return os.write(self.fd, *args)
+
+ def getsockopt(self, level, optname, buflen=None):
+ if (level == socket.SOL_SOCKET and
+ optname == socket.SO_ERROR and
+ not buflen):
+ return 0
+ raise NotImplementedError("Only asyncore specific behaviour "
+ "implemented.")
+
+ read = recv
+ write = send
+
+ def close(self):
+ if self.fd < 0:
+ return
+ fd = self.fd
+ self.fd = -1
+ os.close(fd)
+
+ def fileno(self):
+ return self.fd
+
+ class file_dispatcher(dispatcher):
+
+ def __init__(self, fd, map=None):
+ dispatcher.__init__(self, None, map)
+ self.connected = True
+ try:
+ fd = fd.fileno()
+ except AttributeError:
+ pass
+ self.set_file(fd)
+ # set it to non-blocking mode
+ flags = fcntl.fcntl(fd, fcntl.F_GETFL, 0)
+ flags = flags | os.O_NONBLOCK
+ fcntl.fcntl(fd, fcntl.F_SETFL, flags)
+
+ def set_file(self, fd):
+ self.socket = file_wrapper(fd)
+ self._fileno = self.socket.fileno()
+ self.add_channel()
diff --git a/cashew/Lib/atexit.py b/cashew/Lib/atexit.py
new file mode 100644
index 0000000..93fddf7
--- /dev/null
+++ b/cashew/Lib/atexit.py
@@ -0,0 +1,65 @@
+"""
+atexit.py - allow programmer to define multiple exit functions to be executed
+upon normal program termination.
+
+One public function, register, is defined.
+"""
+
+__all__ = ["register"]
+
+import sys
+
+_exithandlers = []
+def _run_exitfuncs():
+ """run any registered exit functions
+
+ _exithandlers is traversed in reverse order so functions are executed
+ last in, first out.
+ """
+
+ exc_info = None
+ while _exithandlers:
+ func, targs, kargs = _exithandlers.pop()
+ try:
+ func(*targs, **kargs)
+ except SystemExit:
+ exc_info = sys.exc_info()
+ except:
+ import traceback
+ print >> sys.stderr, "Error in atexit._run_exitfuncs:"
+ traceback.print_exc()
+ exc_info = sys.exc_info()
+
+ if exc_info is not None:
+ raise exc_info[0], exc_info[1], exc_info[2]
+
+
+def register(func, *targs, **kargs):
+ """register a function to be executed upon normal program termination
+
+ func - function to be called at exit
+ targs - optional arguments to pass to func
+ kargs - optional keyword arguments to pass to func
+
+ func is returned to facilitate usage as a decorator.
+ """
+ _exithandlers.append((func, targs, kargs))
+ return func
+
+if hasattr(sys, "exitfunc"):
+ # Assume it's another registered exit function - append it to our list
+ register(sys.exitfunc)
+sys.exitfunc = _run_exitfuncs
+
+if __name__ == "__main__":
+ def x1():
+ print "running x1"
+ def x2(n):
+ print "running x2(%r)" % (n,)
+ def x3(n, kwd=None):
+ print "running x3(%r, kwd=%r)" % (n, kwd)
+
+ register(x1)
+ register(x2, 12)
+ register(x3, 5, "bar")
+ register(x3, "no kwd args")
diff --git a/cashew/Lib/audiodev.py b/cashew/Lib/audiodev.py
new file mode 100644
index 0000000..b6831a6
--- /dev/null
+++ b/cashew/Lib/audiodev.py
@@ -0,0 +1,260 @@
+"""Classes for manipulating audio devices (currently only for Sun and SGI)"""
+from warnings import warnpy3k
+warnpy3k("the audiodev module has been removed in Python 3.0", stacklevel=2)
+del warnpy3k
+
+__all__ = ["error","AudioDev"]
+
+class error(Exception):
+ pass
+
+class Play_Audio_sgi:
+ # Private instance variables
+## if 0: access frameratelist, nchannelslist, sampwidthlist, oldparams, \
+## params, config, inited_outrate, inited_width, \
+## inited_nchannels, port, converter, classinited: private
+
+ classinited = 0
+ frameratelist = nchannelslist = sampwidthlist = None
+
+ def initclass(self):
+ import AL
+ self.frameratelist = [
+ (48000, AL.RATE_48000),
+ (44100, AL.RATE_44100),
+ (32000, AL.RATE_32000),
+ (22050, AL.RATE_22050),
+ (16000, AL.RATE_16000),
+ (11025, AL.RATE_11025),
+ ( 8000, AL.RATE_8000),
+ ]
+ self.nchannelslist = [
+ (1, AL.MONO),
+ (2, AL.STEREO),
+ (4, AL.QUADRO),
+ ]
+ self.sampwidthlist = [
+ (1, AL.SAMPLE_8),
+ (2, AL.SAMPLE_16),
+ (3, AL.SAMPLE_24),
+ ]
+ self.classinited = 1
+
+ def __init__(self):
+ import al, AL
+ if not self.classinited:
+ self.initclass()
+ self.oldparams = []
+ self.params = [AL.OUTPUT_RATE, 0]
+ self.config = al.newconfig()
+ self.inited_outrate = 0
+ self.inited_width = 0
+ self.inited_nchannels = 0
+ self.converter = None
+ self.port = None
+ return
+
+ def __del__(self):
+ if self.port:
+ self.stop()
+ if self.oldparams:
+ import al, AL
+ al.setparams(AL.DEFAULT_DEVICE, self.oldparams)
+ self.oldparams = []
+
+ def wait(self):
+ if not self.port:
+ return
+ import time
+ while self.port.getfilled() > 0:
+ time.sleep(0.1)
+ self.stop()
+
+ def stop(self):
+ if self.port:
+ self.port.closeport()
+ self.port = None
+ if self.oldparams:
+ import al, AL
+ al.setparams(AL.DEFAULT_DEVICE, self.oldparams)
+ self.oldparams = []
+
+ def setoutrate(self, rate):
+ for (raw, cooked) in self.frameratelist:
+ if rate == raw:
+ self.params[1] = cooked
+ self.inited_outrate = 1
+ break
+ else:
+ raise error, 'bad output rate'
+
+ def setsampwidth(self, width):
+ for (raw, cooked) in self.sampwidthlist:
+ if width == raw:
+ self.config.setwidth(cooked)
+ self.inited_width = 1
+ break
+ else:
+ if width == 0:
+ import AL
+ self.inited_width = 0
+ self.config.setwidth(AL.SAMPLE_16)
+ self.converter = self.ulaw2lin
+ else:
+ raise error, 'bad sample width'
+
+ def setnchannels(self, nchannels):
+ for (raw, cooked) in self.nchannelslist:
+ if nchannels == raw:
+ self.config.setchannels(cooked)
+ self.inited_nchannels = 1
+ break
+ else:
+ raise error, 'bad # of channels'
+
+ def writeframes(self, data):
+ if not (self.inited_outrate and self.inited_nchannels):
+ raise error, 'params not specified'
+ if not self.port:
+ import al, AL
+ self.port = al.openport('Python', 'w', self.config)
+ self.oldparams = self.params[:]
+ al.getparams(AL.DEFAULT_DEVICE, self.oldparams)
+ al.setparams(AL.DEFAULT_DEVICE, self.params)
+ if self.converter:
+ data = self.converter(data)
+ self.port.writesamps(data)
+
+ def getfilled(self):
+ if self.port:
+ return self.port.getfilled()
+ else:
+ return 0
+
+ def getfillable(self):
+ if self.port:
+ return self.port.getfillable()
+ else:
+ return self.config.getqueuesize()
+
+ # private methods
+## if 0: access *: private
+
+ def ulaw2lin(self, data):
+ import audioop
+ return audioop.ulaw2lin(data, 2)
+
+class Play_Audio_sun:
+## if 0: access outrate, sampwidth, nchannels, inited_outrate, inited_width, \
+## inited_nchannels, converter: private
+
+ def __init__(self):
+ self.outrate = 0
+ self.sampwidth = 0
+ self.nchannels = 0
+ self.inited_outrate = 0
+ self.inited_width = 0
+ self.inited_nchannels = 0
+ self.converter = None
+ self.port = None
+ return
+
+ def __del__(self):
+ self.stop()
+
+ def setoutrate(self, rate):
+ self.outrate = rate
+ self.inited_outrate = 1
+
+ def setsampwidth(self, width):
+ self.sampwidth = width
+ self.inited_width = 1
+
+ def setnchannels(self, nchannels):
+ self.nchannels = nchannels
+ self.inited_nchannels = 1
+
+ def writeframes(self, data):
+ if not (self.inited_outrate and self.inited_width and self.inited_nchannels):
+ raise error, 'params not specified'
+ if not self.port:
+ import sunaudiodev, SUNAUDIODEV
+ self.port = sunaudiodev.open('w')
+ info = self.port.getinfo()
+ info.o_sample_rate = self.outrate
+ info.o_channels = self.nchannels
+ if self.sampwidth == 0:
+ info.o_precision = 8
+ self.o_encoding = SUNAUDIODEV.ENCODING_ULAW
+ # XXX Hack, hack -- leave defaults
+ else:
+ info.o_precision = 8 * self.sampwidth
+ info.o_encoding = SUNAUDIODEV.ENCODING_LINEAR
+ self.port.setinfo(info)
+ if self.converter:
+ data = self.converter(data)
+ self.port.write(data)
+
+ def wait(self):
+ if not self.port:
+ return
+ self.port.drain()
+ self.stop()
+
+ def stop(self):
+ if self.port:
+ self.port.flush()
+ self.port.close()
+ self.port = None
+
+ def getfilled(self):
+ if self.port:
+ return self.port.obufcount()
+ else:
+ return 0
+
+## # Nobody remembers what this method does, and it's broken. :-(
+## def getfillable(self):
+## return BUFFERSIZE - self.getfilled()
+
+def AudioDev():
+ # Dynamically try to import and use a platform specific module.
+ try:
+ import al
+ except ImportError:
+ try:
+ import sunaudiodev
+ return Play_Audio_sun()
+ except ImportError:
+ try:
+ import Audio_mac
+ except ImportError:
+ raise error, 'no audio device'
+ else:
+ return Audio_mac.Play_Audio_mac()
+ else:
+ return Play_Audio_sgi()
+
+def test(fn = None):
+ import sys
+ if sys.argv[1:]:
+ fn = sys.argv[1]
+ else:
+ fn = 'f:just samples:just.aif'
+ import aifc
+ af = aifc.open(fn, 'r')
+ print fn, af.getparams()
+ p = AudioDev()
+ p.setoutrate(af.getframerate())
+ p.setsampwidth(af.getsampwidth())
+ p.setnchannels(af.getnchannels())
+ BUFSIZ = af.getframerate()/af.getsampwidth()/af.getnchannels()
+ while 1:
+ data = af.readframes(BUFSIZ)
+ if not data: break
+ print len(data)
+ p.writeframes(data)
+ p.wait()
+
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/base64.py b/cashew/Lib/base64.py
new file mode 100644
index 0000000..38bc61e
--- /dev/null
+++ b/cashew/Lib/base64.py
@@ -0,0 +1,367 @@
+#! /usr/bin/env python
+
+"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
+
+# Modified 04-Oct-1995 by Jack Jansen to use binascii module
+# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
+
+import re
+import struct
+import string
+import binascii
+
+
+__all__ = [
+ # Legacy interface exports traditional RFC 1521 Base64 encodings
+ 'encode', 'decode', 'encodestring', 'decodestring',
+ # Generalized interface for other encodings
+ 'b64encode', 'b64decode', 'b32encode', 'b32decode',
+ 'b16encode', 'b16decode',
+ # Standard Base64 encoding
+ 'standard_b64encode', 'standard_b64decode',
+ # Some common Base64 alternatives. As referenced by RFC 3458, see thread
+ # starting at:
+ #
+ # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
+ 'urlsafe_b64encode', 'urlsafe_b64decode',
+ ]
+
+_translation = [chr(_x) for _x in range(256)]
+EMPTYSTRING = ''
+
+
+def _translate(s, altchars):
+ translation = _translation[:]
+ for k, v in altchars.items():
+ translation[ord(k)] = v
+ return s.translate(''.join(translation))
+
+
+
+# Base64 encoding/decoding uses binascii
+
+def b64encode(s, altchars=None):
+ """Encode a string using Base64.
+
+ s is the string to encode. Optional altchars must be a string of at least
+ length 2 (additional characters are ignored) which specifies an
+ alternative alphabet for the '+' and '/' characters. This allows an
+ application to e.g. generate url or filesystem safe Base64 strings.
+
+ The encoded string is returned.
+ """
+ # Strip off the trailing newline
+ encoded = binascii.b2a_base64(s)[:-1]
+ if altchars is not None:
+ return encoded.translate(string.maketrans(b'+/', altchars[:2]))
+ return encoded
+
+
+def b64decode(s, altchars=None):
+ """Decode a Base64 encoded string.
+
+ s is the string to decode. Optional altchars must be a string of at least
+ length 2 (additional characters are ignored) which specifies the
+ alternative alphabet used instead of the '+' and '/' characters.
+
+ The decoded string is returned. A TypeError is raised if s is
+ incorrectly padded. Characters that are neither in the normal base-64
+ alphabet nor the alternative alphabet are discarded prior to the padding
+ check.
+ """
+ if altchars is not None:
+ s = s.translate(string.maketrans(altchars[:2], '+/'))
+ try:
+ return binascii.a2b_base64(s)
+ except binascii.Error, msg:
+ # Transform this exception for consistency
+ raise TypeError(msg)
+
+
+def standard_b64encode(s):
+ """Encode a string using the standard Base64 alphabet.
+
+ s is the string to encode. The encoded string is returned.
+ """
+ return b64encode(s)
+
+def standard_b64decode(s):
+ """Decode a string encoded with the standard Base64 alphabet.
+
+ Argument s is the string to decode. The decoded string is returned. A
+ TypeError is raised if the string is incorrectly padded. Characters that
+ are not in the standard alphabet are discarded prior to the padding
+ check.
+ """
+ return b64decode(s)
+
+_urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
+_urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
+
+def urlsafe_b64encode(s):
+ """Encode a string using the URL- and filesystem-safe Base64 alphabet.
+
+ Argument s is the string to encode. The encoded string is returned. The
+ alphabet uses '-' instead of '+' and '_' instead of '/'.
+ """
+ return b64encode(s).translate(_urlsafe_encode_translation)
+
+def urlsafe_b64decode(s):
+ """Decode a string using the URL- and filesystem-safe Base64 alphabet.
+
+ Argument s is the string to decode. The decoded string is returned. A
+ TypeError is raised if the string is incorrectly padded. Characters that
+ are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
+ '/', are discarded prior to the padding check.
+
+ The alphabet uses '-' instead of '+' and '_' instead of '/'.
+ """
+ return b64decode(s.translate(_urlsafe_decode_translation))
+
+
+
+# Base32 encoding/decoding must be done in Python
+_b32alphabet = {
+ 0: 'A', 9: 'J', 18: 'S', 27: '3',
+ 1: 'B', 10: 'K', 19: 'T', 28: '4',
+ 2: 'C', 11: 'L', 20: 'U', 29: '5',
+ 3: 'D', 12: 'M', 21: 'V', 30: '6',
+ 4: 'E', 13: 'N', 22: 'W', 31: '7',
+ 5: 'F', 14: 'O', 23: 'X',
+ 6: 'G', 15: 'P', 24: 'Y',
+ 7: 'H', 16: 'Q', 25: 'Z',
+ 8: 'I', 17: 'R', 26: '2',
+ }
+
+_b32tab = _b32alphabet.items()
+_b32tab.sort()
+_b32tab = [v for k, v in _b32tab]
+_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
+
+
+def b32encode(s):
+ """Encode a string using Base32.
+
+ s is the string to encode. The encoded string is returned.
+ """
+ parts = []
+ quanta, leftover = divmod(len(s), 5)
+ # Pad the last quantum with zero bits if necessary
+ if leftover:
+ s += ('\0' * (5 - leftover))
+ quanta += 1
+ for i in range(quanta):
+ # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
+ # code is to process the 40 bits in units of 5 bits. So we take the 1
+ # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
+ # bits of c2 and tack them onto c3. The shifts and masks are intended
+ # to give us values of exactly 5 bits in width.
+ c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
+ c2 += (c1 & 1) << 16 # 17 bits wide
+ c3 += (c2 & 3) << 8 # 10 bits wide
+ parts.extend([_b32tab[c1 >> 11], # bits 1 - 5
+ _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
+ _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
+ _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
+ _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
+ _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
+ _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
+ _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
+ ])
+ encoded = EMPTYSTRING.join(parts)
+ # Adjust for any leftover partial quanta
+ if leftover == 1:
+ return encoded[:-6] + '======'
+ elif leftover == 2:
+ return encoded[:-4] + '===='
+ elif leftover == 3:
+ return encoded[:-3] + '==='
+ elif leftover == 4:
+ return encoded[:-1] + '='
+ return encoded
+
+
+def b32decode(s, casefold=False, map01=None):
+ """Decode a Base32 encoded string.
+
+ s is the string to decode. Optional casefold is a flag specifying whether
+ a lowercase alphabet is acceptable as input. For security purposes, the
+ default is False.
+
+ RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
+ (oh), and for optional mapping of the digit 1 (one) to either the letter I
+ (eye) or letter L (el). The optional argument map01 when not None,
+ specifies which letter the digit 1 should be mapped to (when map01 is not
+ None, the digit 0 is always mapped to the letter O). For security
+ purposes the default is None, so that 0 and 1 are not allowed in the
+ input.
+
+ The decoded string is returned. A TypeError is raised if s were
+ incorrectly padded or if there are non-alphabet characters present in the
+ string.
+ """
+ quanta, leftover = divmod(len(s), 8)
+ if leftover:
+ raise TypeError('Incorrect padding')
+ # Handle section 2.4 zero and one mapping. The flag map01 will be either
+ # False, or the character to map the digit 1 (one) to. It should be
+ # either L (el) or I (eye).
+ if map01:
+ s = s.translate(string.maketrans(b'01', b'O' + map01))
+ if casefold:
+ s = s.upper()
+ # Strip off pad characters from the right. We need to count the pad
+ # characters because this will tell us how many null bytes to remove from
+ # the end of the decoded string.
+ padchars = 0
+ mo = re.search('(?P[=]*)$', s)
+ if mo:
+ padchars = len(mo.group('pad'))
+ if padchars > 0:
+ s = s[:-padchars]
+ # Now decode the full quanta
+ parts = []
+ acc = 0
+ shift = 35
+ for c in s:
+ val = _b32rev.get(c)
+ if val is None:
+ raise TypeError('Non-base32 digit found')
+ acc += _b32rev[c] << shift
+ shift -= 5
+ if shift < 0:
+ parts.append(binascii.unhexlify('%010x' % acc))
+ acc = 0
+ shift = 35
+ # Process the last, partial quanta
+ last = binascii.unhexlify('%010x' % acc)
+ if padchars == 0:
+ last = '' # No characters
+ elif padchars == 1:
+ last = last[:-1]
+ elif padchars == 3:
+ last = last[:-2]
+ elif padchars == 4:
+ last = last[:-3]
+ elif padchars == 6:
+ last = last[:-4]
+ else:
+ raise TypeError('Incorrect padding')
+ parts.append(last)
+ return EMPTYSTRING.join(parts)
+
+
+
+# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
+# lowercase. The RFC also recommends against accepting input case
+# insensitively.
+def b16encode(s):
+ """Encode a string using Base16.
+
+ s is the string to encode. The encoded string is returned.
+ """
+ return binascii.hexlify(s).upper()
+
+
+def b16decode(s, casefold=False):
+ """Decode a Base16 encoded string.
+
+ s is the string to decode. Optional casefold is a flag specifying whether
+ a lowercase alphabet is acceptable as input. For security purposes, the
+ default is False.
+
+ The decoded string is returned. A TypeError is raised if s is
+ incorrectly padded or if there are non-alphabet characters present in the
+ string.
+ """
+ if casefold:
+ s = s.upper()
+ if re.search('[^0-9A-F]', s):
+ raise TypeError('Non-base16 digit found')
+ return binascii.unhexlify(s)
+
+
+
+# Legacy interface. This code could be cleaned up since I don't believe
+# binascii has any line length limitations. It just doesn't seem worth it
+# though.
+
+MAXLINESIZE = 76 # Excluding the CRLF
+MAXBINSIZE = (MAXLINESIZE//4)*3
+
+def encode(input, output):
+ """Encode a file."""
+ while True:
+ s = input.read(MAXBINSIZE)
+ if not s:
+ break
+ while len(s) < MAXBINSIZE:
+ ns = input.read(MAXBINSIZE-len(s))
+ if not ns:
+ break
+ s += ns
+ line = binascii.b2a_base64(s)
+ output.write(line)
+
+
+def decode(input, output):
+ """Decode a file."""
+ while True:
+ line = input.readline()
+ if not line:
+ break
+ s = binascii.a2b_base64(line)
+ output.write(s)
+
+
+def encodestring(s):
+ """Encode a string into multiple lines of base-64 data."""
+ pieces = []
+ for i in range(0, len(s), MAXBINSIZE):
+ chunk = s[i : i + MAXBINSIZE]
+ pieces.append(binascii.b2a_base64(chunk))
+ return "".join(pieces)
+
+
+def decodestring(s):
+ """Decode a string."""
+ return binascii.a2b_base64(s)
+
+
+
+# Useable as a script...
+def test():
+ """Small test program"""
+ import sys, getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'deut')
+ except getopt.error, msg:
+ sys.stdout = sys.stderr
+ print msg
+ print """usage: %s [-d|-e|-u|-t] [file|-]
+ -d, -u: decode
+ -e: encode (default)
+ -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
+ sys.exit(2)
+ func = encode
+ for o, a in opts:
+ if o == '-e': func = encode
+ if o == '-d': func = decode
+ if o == '-u': func = decode
+ if o == '-t': test1(); return
+ if args and args[0] != '-':
+ with open(args[0], 'rb') as f:
+ func(f, sys.stdout)
+ else:
+ func(sys.stdin, sys.stdout)
+
+
+def test1():
+ s0 = "Aladdin:open sesame"
+ s1 = encodestring(s0)
+ s2 = decodestring(s1)
+ print s0, repr(s1), s2
+
+
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/bdb.py b/cashew/Lib/bdb.py
new file mode 100644
index 0000000..59440a9
--- /dev/null
+++ b/cashew/Lib/bdb.py
@@ -0,0 +1,645 @@
+"""Debugger basics"""
+
+import fnmatch
+import sys
+import os
+import types
+
+__all__ = ["BdbQuit","Bdb","Breakpoint"]
+
+class BdbQuit(Exception):
+ """Exception to give up completely"""
+
+
+class Bdb:
+
+ """Generic Python debugger base class.
+
+ This class takes care of details of the trace facility;
+ a derived class should implement user interaction.
+ The standard debugger class (pdb.Pdb) is an example.
+ """
+
+ def __init__(self, skip=None):
+ self.skip = set(skip) if skip else None
+ self.breaks = {}
+ self.fncache = {}
+ self.frame_returning = None
+
+ def canonic(self, filename):
+ if filename == "<" + filename[1:-1] + ">":
+ return filename
+ canonic = self.fncache.get(filename)
+ if not canonic:
+ canonic = os.path.abspath(filename)
+ canonic = os.path.normcase(canonic)
+ self.fncache[filename] = canonic
+ return canonic
+
+ def reset(self):
+ import linecache
+ linecache.checkcache()
+ self.botframe = None
+ self._set_stopinfo(None, None)
+
+ def trace_dispatch(self, frame, event, arg):
+ if self.quitting:
+ return # None
+ if event == 'line':
+ return self.dispatch_line(frame)
+ if event == 'call':
+ return self.dispatch_call(frame, arg)
+ if event == 'return':
+ return self.dispatch_return(frame, arg)
+ if event == 'exception':
+ return self.dispatch_exception(frame, arg)
+ if event == 'c_call':
+ return self.trace_dispatch
+ if event == 'c_exception':
+ return self.trace_dispatch
+ if event == 'c_return':
+ return self.trace_dispatch
+ print 'bdb.Bdb.dispatch: unknown debugging event:', repr(event)
+ return self.trace_dispatch
+
+ def dispatch_line(self, frame):
+ if self.stop_here(frame) or self.break_here(frame):
+ self.user_line(frame)
+ if self.quitting: raise BdbQuit
+ return self.trace_dispatch
+
+ def dispatch_call(self, frame, arg):
+ # XXX 'arg' is no longer used
+ if self.botframe is None:
+ # First call of dispatch since reset()
+ self.botframe = frame.f_back # (CT) Note that this may also be None!
+ return self.trace_dispatch
+ if not (self.stop_here(frame) or self.break_anywhere(frame)):
+ # No need to trace this function
+ return # None
+ self.user_call(frame, arg)
+ if self.quitting: raise BdbQuit
+ return self.trace_dispatch
+
+ def dispatch_return(self, frame, arg):
+ if self.stop_here(frame) or frame == self.returnframe:
+ try:
+ self.frame_returning = frame
+ self.user_return(frame, arg)
+ finally:
+ self.frame_returning = None
+ if self.quitting: raise BdbQuit
+ return self.trace_dispatch
+
+ def dispatch_exception(self, frame, arg):
+ if self.stop_here(frame):
+ self.user_exception(frame, arg)
+ if self.quitting: raise BdbQuit
+ return self.trace_dispatch
+
+ # Normally derived classes don't override the following
+ # methods, but they may if they want to redefine the
+ # definition of stopping and breakpoints.
+
+ def is_skipped_module(self, module_name):
+ for pattern in self.skip:
+ if fnmatch.fnmatch(module_name, pattern):
+ return True
+ return False
+
+ def stop_here(self, frame):
+ # (CT) stopframe may now also be None, see dispatch_call.
+ # (CT) the former test for None is therefore removed from here.
+ if self.skip and \
+ self.is_skipped_module(frame.f_globals.get('__name__')):
+ return False
+ if frame is self.stopframe:
+ if self.stoplineno == -1:
+ return False
+ return frame.f_lineno >= self.stoplineno
+ while frame is not None and frame is not self.stopframe:
+ if frame is self.botframe:
+ return True
+ frame = frame.f_back
+ return False
+
+ def break_here(self, frame):
+ filename = self.canonic(frame.f_code.co_filename)
+ if not filename in self.breaks:
+ return False
+ lineno = frame.f_lineno
+ if not lineno in self.breaks[filename]:
+ # The line itself has no breakpoint, but maybe the line is the
+ # first line of a function with breakpoint set by function name.
+ lineno = frame.f_code.co_firstlineno
+ if not lineno in self.breaks[filename]:
+ return False
+
+ # flag says ok to delete temp. bp
+ (bp, flag) = effective(filename, lineno, frame)
+ if bp:
+ self.currentbp = bp.number
+ if (flag and bp.temporary):
+ self.do_clear(str(bp.number))
+ return True
+ else:
+ return False
+
+ def do_clear(self, arg):
+ raise NotImplementedError, "subclass of bdb must implement do_clear()"
+
+ def break_anywhere(self, frame):
+ return self.canonic(frame.f_code.co_filename) in self.breaks
+
+ # Derived classes should override the user_* methods
+ # to gain control.
+
+ def user_call(self, frame, argument_list):
+ """This method is called when there is the remote possibility
+ that we ever need to stop in this function."""
+ pass
+
+ def user_line(self, frame):
+ """This method is called when we stop or break at this line."""
+ pass
+
+ def user_return(self, frame, return_value):
+ """This method is called when a return trap is set here."""
+ pass
+
+ def user_exception(self, frame, exc_info):
+ exc_type, exc_value, exc_traceback = exc_info
+ """This method is called if an exception occurs,
+ but only if we are to stop at or just below this level."""
+ pass
+
+ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0):
+ self.stopframe = stopframe
+ self.returnframe = returnframe
+ self.quitting = 0
+ # stoplineno >= 0 means: stop at line >= the stoplineno
+ # stoplineno -1 means: don't stop at all
+ self.stoplineno = stoplineno
+
+ # Derived classes and clients can call the following methods
+ # to affect the stepping state.
+
+ def set_until(self, frame): #the name "until" is borrowed from gdb
+ """Stop when the line with the line no greater than the current one is
+ reached or when returning from current frame"""
+ self._set_stopinfo(frame, frame, frame.f_lineno+1)
+
+ def set_step(self):
+ """Stop after one line of code."""
+ # Issue #13183: pdb skips frames after hitting a breakpoint and running
+ # step commands.
+ # Restore the trace function in the caller (that may not have been set
+ # for performance reasons) when returning from the current frame.
+ if self.frame_returning:
+ caller_frame = self.frame_returning.f_back
+ if caller_frame and not caller_frame.f_trace:
+ caller_frame.f_trace = self.trace_dispatch
+ self._set_stopinfo(None, None)
+
+ def set_next(self, frame):
+ """Stop on the next line in or below the given frame."""
+ self._set_stopinfo(frame, None)
+
+ def set_return(self, frame):
+ """Stop when returning from the given frame."""
+ self._set_stopinfo(frame.f_back, frame)
+
+ def set_trace(self, frame=None):
+ """Start debugging from `frame`.
+
+ If frame is not specified, debugging starts from caller's frame.
+ """
+ if frame is None:
+ frame = sys._getframe().f_back
+ self.reset()
+ while frame:
+ frame.f_trace = self.trace_dispatch
+ self.botframe = frame
+ frame = frame.f_back
+ self.set_step()
+ sys.settrace(self.trace_dispatch)
+
+ def set_continue(self):
+ # Don't stop except at breakpoints or when finished
+ self._set_stopinfo(self.botframe, None, -1)
+ if not self.breaks:
+ # no breakpoints; run without debugger overhead
+ sys.settrace(None)
+ frame = sys._getframe().f_back
+ while frame and frame is not self.botframe:
+ del frame.f_trace
+ frame = frame.f_back
+
+ def set_quit(self):
+ self.stopframe = self.botframe
+ self.returnframe = None
+ self.quitting = 1
+ sys.settrace(None)
+
+ # Derived classes and clients can call the following methods
+ # to manipulate breakpoints. These methods return an
+ # error message is something went wrong, None if all is well.
+ # Set_break prints out the breakpoint line and file:lineno.
+ # Call self.get_*break*() to see the breakpoints or better
+ # for bp in Breakpoint.bpbynumber: if bp: bp.bpprint().
+
+ def set_break(self, filename, lineno, temporary=0, cond = None,
+ funcname=None):
+ filename = self.canonic(filename)
+ import linecache # Import as late as possible
+ line = linecache.getline(filename, lineno)
+ if not line:
+ return 'Line %s:%d does not exist' % (filename,
+ lineno)
+ if not filename in self.breaks:
+ self.breaks[filename] = []
+ list = self.breaks[filename]
+ if not lineno in list:
+ list.append(lineno)
+ bp = Breakpoint(filename, lineno, temporary, cond, funcname)
+
+ def _prune_breaks(self, filename, lineno):
+ if (filename, lineno) not in Breakpoint.bplist:
+ self.breaks[filename].remove(lineno)
+ if not self.breaks[filename]:
+ del self.breaks[filename]
+
+ def clear_break(self, filename, lineno):
+ filename = self.canonic(filename)
+ if not filename in self.breaks:
+ return 'There are no breakpoints in %s' % filename
+ if lineno not in self.breaks[filename]:
+ return 'There is no breakpoint at %s:%d' % (filename,
+ lineno)
+ # If there's only one bp in the list for that file,line
+ # pair, then remove the breaks entry
+ for bp in Breakpoint.bplist[filename, lineno][:]:
+ bp.deleteMe()
+ self._prune_breaks(filename, lineno)
+
+ def clear_bpbynumber(self, arg):
+ try:
+ number = int(arg)
+ except:
+ return 'Non-numeric breakpoint number (%s)' % arg
+ try:
+ bp = Breakpoint.bpbynumber[number]
+ except IndexError:
+ return 'Breakpoint number (%d) out of range' % number
+ if not bp:
+ return 'Breakpoint (%d) already deleted' % number
+ bp.deleteMe()
+ self._prune_breaks(bp.file, bp.line)
+
+ def clear_all_file_breaks(self, filename):
+ filename = self.canonic(filename)
+ if not filename in self.breaks:
+ return 'There are no breakpoints in %s' % filename
+ for line in self.breaks[filename]:
+ blist = Breakpoint.bplist[filename, line]
+ for bp in blist:
+ bp.deleteMe()
+ del self.breaks[filename]
+
+ def clear_all_breaks(self):
+ if not self.breaks:
+ return 'There are no breakpoints'
+ for bp in Breakpoint.bpbynumber:
+ if bp:
+ bp.deleteMe()
+ self.breaks = {}
+
+ def get_break(self, filename, lineno):
+ filename = self.canonic(filename)
+ return filename in self.breaks and \
+ lineno in self.breaks[filename]
+
+ def get_breaks(self, filename, lineno):
+ filename = self.canonic(filename)
+ return filename in self.breaks and \
+ lineno in self.breaks[filename] and \
+ Breakpoint.bplist[filename, lineno] or []
+
+ def get_file_breaks(self, filename):
+ filename = self.canonic(filename)
+ if filename in self.breaks:
+ return self.breaks[filename]
+ else:
+ return []
+
+ def get_all_breaks(self):
+ return self.breaks
+
+ # Derived classes and clients can call the following method
+ # to get a data structure representing a stack trace.
+
+ def get_stack(self, f, t):
+ stack = []
+ if t and t.tb_frame is f:
+ t = t.tb_next
+ while f is not None:
+ stack.append((f, f.f_lineno))
+ if f is self.botframe:
+ break
+ f = f.f_back
+ stack.reverse()
+ i = max(0, len(stack) - 1)
+ while t is not None:
+ stack.append((t.tb_frame, t.tb_lineno))
+ t = t.tb_next
+ if f is None:
+ i = max(0, len(stack) - 1)
+ return stack, i
+
+ #
+
+ def format_stack_entry(self, frame_lineno, lprefix=': '):
+ import linecache, repr
+ frame, lineno = frame_lineno
+ filename = self.canonic(frame.f_code.co_filename)
+ s = '%s(%r)' % (filename, lineno)
+ if frame.f_code.co_name:
+ s = s + frame.f_code.co_name
+ else:
+ s = s + ""
+ if '__args__' in frame.f_locals:
+ args = frame.f_locals['__args__']
+ else:
+ args = None
+ if args:
+ s = s + repr.repr(args)
+ else:
+ s = s + '()'
+ if '__return__' in frame.f_locals:
+ rv = frame.f_locals['__return__']
+ s = s + '->'
+ s = s + repr.repr(rv)
+ line = linecache.getline(filename, lineno, frame.f_globals)
+ if line: s = s + lprefix + line.strip()
+ return s
+
+ # The following two methods can be called by clients to use
+ # a debugger to debug a statement, given as a string.
+
+ def run(self, cmd, globals=None, locals=None):
+ if globals is None:
+ import __main__
+ globals = __main__.__dict__
+ if locals is None:
+ locals = globals
+ self.reset()
+ sys.settrace(self.trace_dispatch)
+ if not isinstance(cmd, types.CodeType):
+ cmd = cmd+'\n'
+ try:
+ exec cmd in globals, locals
+ except BdbQuit:
+ pass
+ finally:
+ self.quitting = 1
+ sys.settrace(None)
+
+ def runeval(self, expr, globals=None, locals=None):
+ if globals is None:
+ import __main__
+ globals = __main__.__dict__
+ if locals is None:
+ locals = globals
+ self.reset()
+ sys.settrace(self.trace_dispatch)
+ if not isinstance(expr, types.CodeType):
+ expr = expr+'\n'
+ try:
+ return eval(expr, globals, locals)
+ except BdbQuit:
+ pass
+ finally:
+ self.quitting = 1
+ sys.settrace(None)
+
+ def runctx(self, cmd, globals, locals):
+ # B/W compatibility
+ self.run(cmd, globals, locals)
+
+ # This method is more useful to debug a single function call.
+
+ def runcall(self, func, *args, **kwds):
+ self.reset()
+ sys.settrace(self.trace_dispatch)
+ res = None
+ try:
+ res = func(*args, **kwds)
+ except BdbQuit:
+ pass
+ finally:
+ self.quitting = 1
+ sys.settrace(None)
+ return res
+
+
+def set_trace():
+ Bdb().set_trace()
+
+
+class Breakpoint:
+
+ """Breakpoint class
+
+ Implements temporary breakpoints, ignore counts, disabling and
+ (re)-enabling, and conditionals.
+
+ Breakpoints are indexed by number through bpbynumber and by
+ the file,line tuple using bplist. The former points to a
+ single instance of class Breakpoint. The latter points to a
+ list of such instances since there may be more than one
+ breakpoint per line.
+
+ """
+
+ # XXX Keeping state in the class is a mistake -- this means
+ # you cannot have more than one active Bdb instance.
+
+ next = 1 # Next bp to be assigned
+ bplist = {} # indexed by (file, lineno) tuple
+ bpbynumber = [None] # Each entry is None or an instance of Bpt
+ # index 0 is unused, except for marking an
+ # effective break .... see effective()
+
+ def __init__(self, file, line, temporary=0, cond=None, funcname=None):
+ self.funcname = funcname
+ # Needed if funcname is not None.
+ self.func_first_executable_line = None
+ self.file = file # This better be in canonical form!
+ self.line = line
+ self.temporary = temporary
+ self.cond = cond
+ self.enabled = 1
+ self.ignore = 0
+ self.hits = 0
+ self.number = Breakpoint.next
+ Breakpoint.next = Breakpoint.next + 1
+ # Build the two lists
+ self.bpbynumber.append(self)
+ if (file, line) in self.bplist:
+ self.bplist[file, line].append(self)
+ else:
+ self.bplist[file, line] = [self]
+
+
+ def deleteMe(self):
+ index = (self.file, self.line)
+ self.bpbynumber[self.number] = None # No longer in list
+ self.bplist[index].remove(self)
+ if not self.bplist[index]:
+ # No more bp for this f:l combo
+ del self.bplist[index]
+
+ def enable(self):
+ self.enabled = 1
+
+ def disable(self):
+ self.enabled = 0
+
+ def bpprint(self, out=None):
+ if out is None:
+ out = sys.stdout
+ if self.temporary:
+ disp = 'del '
+ else:
+ disp = 'keep '
+ if self.enabled:
+ disp = disp + 'yes '
+ else:
+ disp = disp + 'no '
+ print >>out, '%-4dbreakpoint %s at %s:%d' % (self.number, disp,
+ self.file, self.line)
+ if self.cond:
+ print >>out, '\tstop only if %s' % (self.cond,)
+ if self.ignore:
+ print >>out, '\tignore next %d hits' % (self.ignore)
+ if (self.hits):
+ if (self.hits > 1): ss = 's'
+ else: ss = ''
+ print >>out, ('\tbreakpoint already hit %d time%s' %
+ (self.hits, ss))
+
+# -----------end of Breakpoint class----------
+
+def checkfuncname(b, frame):
+ """Check whether we should break here because of `b.funcname`."""
+ if not b.funcname:
+ # Breakpoint was set via line number.
+ if b.line != frame.f_lineno:
+ # Breakpoint was set at a line with a def statement and the function
+ # defined is called: don't break.
+ return False
+ return True
+
+ # Breakpoint set via function name.
+
+ if frame.f_code.co_name != b.funcname:
+ # It's not a function call, but rather execution of def statement.
+ return False
+
+ # We are in the right frame.
+ if not b.func_first_executable_line:
+ # The function is entered for the 1st time.
+ b.func_first_executable_line = frame.f_lineno
+
+ if b.func_first_executable_line != frame.f_lineno:
+ # But we are not at the first line number: don't break.
+ return False
+ return True
+
+# Determines if there is an effective (active) breakpoint at this
+# line of code. Returns breakpoint number or 0 if none
+def effective(file, line, frame):
+ """Determine which breakpoint for this file:line is to be acted upon.
+
+ Called only if we know there is a bpt at this
+ location. Returns breakpoint that was triggered and a flag
+ that indicates if it is ok to delete a temporary bp.
+
+ """
+ possibles = Breakpoint.bplist[file,line]
+ for i in range(0, len(possibles)):
+ b = possibles[i]
+ if b.enabled == 0:
+ continue
+ if not checkfuncname(b, frame):
+ continue
+ # Count every hit when bp is enabled
+ b.hits = b.hits + 1
+ if not b.cond:
+ # If unconditional, and ignoring,
+ # go on to next, else break
+ if b.ignore > 0:
+ b.ignore = b.ignore -1
+ continue
+ else:
+ # breakpoint and marker that's ok
+ # to delete if temporary
+ return (b,1)
+ else:
+ # Conditional bp.
+ # Ignore count applies only to those bpt hits where the
+ # condition evaluates to true.
+ try:
+ val = eval(b.cond, frame.f_globals,
+ frame.f_locals)
+ if val:
+ if b.ignore > 0:
+ b.ignore = b.ignore -1
+ # continue
+ else:
+ return (b,1)
+ # else:
+ # continue
+ except:
+ # if eval fails, most conservative
+ # thing is to stop on breakpoint
+ # regardless of ignore count.
+ # Don't delete temporary,
+ # as another hint to user.
+ return (b,0)
+ return (None, None)
+
+# -------------------- testing --------------------
+
+class Tdb(Bdb):
+ def user_call(self, frame, args):
+ name = frame.f_code.co_name
+ if not name: name = '???'
+ print '+++ call', name, args
+ def user_line(self, frame):
+ import linecache
+ name = frame.f_code.co_name
+ if not name: name = '???'
+ fn = self.canonic(frame.f_code.co_filename)
+ line = linecache.getline(fn, frame.f_lineno, frame.f_globals)
+ print '+++', fn, frame.f_lineno, name, ':', line.strip()
+ def user_return(self, frame, retval):
+ print '+++ return', retval
+ def user_exception(self, frame, exc_stuff):
+ print '+++ exception', exc_stuff
+ self.set_continue()
+
+def foo(n):
+ print 'foo(', n, ')'
+ x = bar(n*10)
+ print 'bar returned', x
+
+def bar(a):
+ print 'bar(', a, ')'
+ return a/2
+
+def test():
+ t = Tdb()
+ t.run('import bdb; bdb.foo(10)')
+
+# end
diff --git a/cashew/Lib/binhex.py b/cashew/Lib/binhex.py
new file mode 100644
index 0000000..14ec233
--- /dev/null
+++ b/cashew/Lib/binhex.py
@@ -0,0 +1,518 @@
+"""Macintosh binhex compression/decompression.
+
+easy interface:
+binhex(inputfilename, outputfilename)
+hexbin(inputfilename, outputfilename)
+"""
+
+#
+# Jack Jansen, CWI, August 1995.
+#
+# The module is supposed to be as compatible as possible. Especially the
+# easy interface should work "as expected" on any platform.
+# XXXX Note: currently, textfiles appear in mac-form on all platforms.
+# We seem to lack a simple character-translate in python.
+# (we should probably use ISO-Latin-1 on all but the mac platform).
+# XXXX The simple routines are too simple: they expect to hold the complete
+# files in-core. Should be fixed.
+# XXXX It would be nice to handle AppleDouble format on unix
+# (for servers serving macs).
+# XXXX I don't understand what happens when you get 0x90 times the same byte on
+# input. The resulting code (xx 90 90) would appear to be interpreted as an
+# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
+#
+import sys
+import os
+import struct
+import binascii
+
+__all__ = ["binhex","hexbin","Error"]
+
+class Error(Exception):
+ pass
+
+# States (what have we written)
+_DID_HEADER = 0
+_DID_DATA = 1
+
+# Various constants
+REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder
+LINELEN=64
+RUNCHAR=chr(0x90) # run-length introducer
+
+#
+# This code is no longer byte-order dependent
+
+#
+# Workarounds for non-mac machines.
+try:
+ from Carbon.File import FSSpec, FInfo
+ from MacOS import openrf
+
+ def getfileinfo(name):
+ finfo = FSSpec(name).FSpGetFInfo()
+ dir, file = os.path.split(name)
+ # XXX Get resource/data sizes
+ fp = open(name, 'rb')
+ fp.seek(0, 2)
+ dlen = fp.tell()
+ fp = openrf(name, '*rb')
+ fp.seek(0, 2)
+ rlen = fp.tell()
+ return file, finfo, dlen, rlen
+
+ def openrsrc(name, *mode):
+ if not mode:
+ mode = '*rb'
+ else:
+ mode = '*' + mode[0]
+ return openrf(name, mode)
+
+except ImportError:
+ #
+ # Glue code for non-macintosh usage
+ #
+
+ class FInfo:
+ def __init__(self):
+ self.Type = '????'
+ self.Creator = '????'
+ self.Flags = 0
+
+ def getfileinfo(name):
+ finfo = FInfo()
+ # Quick check for textfile
+ fp = open(name)
+ data = open(name).read(256)
+ for c in data:
+ if not c.isspace() and (c<' ' or ord(c) > 0x7f):
+ break
+ else:
+ finfo.Type = 'TEXT'
+ fp.seek(0, 2)
+ dsize = fp.tell()
+ fp.close()
+ dir, file = os.path.split(name)
+ file = file.replace(':', '-', 1)
+ return file, finfo, dsize, 0
+
+ class openrsrc:
+ def __init__(self, *args):
+ pass
+
+ def read(self, *args):
+ return ''
+
+ def write(self, *args):
+ pass
+
+ def close(self):
+ pass
+
+class _Hqxcoderengine:
+ """Write data to the coder in 3-byte chunks"""
+
+ def __init__(self, ofp):
+ self.ofp = ofp
+ self.data = ''
+ self.hqxdata = ''
+ self.linelen = LINELEN-1
+
+ def write(self, data):
+ self.data = self.data + data
+ datalen = len(self.data)
+ todo = (datalen//3)*3
+ data = self.data[:todo]
+ self.data = self.data[todo:]
+ if not data:
+ return
+ self.hqxdata = self.hqxdata + binascii.b2a_hqx(data)
+ self._flush(0)
+
+ def _flush(self, force):
+ first = 0
+ while first <= len(self.hqxdata)-self.linelen:
+ last = first + self.linelen
+ self.ofp.write(self.hqxdata[first:last]+'\n')
+ self.linelen = LINELEN
+ first = last
+ self.hqxdata = self.hqxdata[first:]
+ if force:
+ self.ofp.write(self.hqxdata + ':\n')
+
+ def close(self):
+ if self.data:
+ self.hqxdata = \
+ self.hqxdata + binascii.b2a_hqx(self.data)
+ self._flush(1)
+ self.ofp.close()
+ del self.ofp
+
+class _Rlecoderengine:
+ """Write data to the RLE-coder in suitably large chunks"""
+
+ def __init__(self, ofp):
+ self.ofp = ofp
+ self.data = ''
+
+ def write(self, data):
+ self.data = self.data + data
+ if len(self.data) < REASONABLY_LARGE:
+ return
+ rledata = binascii.rlecode_hqx(self.data)
+ self.ofp.write(rledata)
+ self.data = ''
+
+ def close(self):
+ if self.data:
+ rledata = binascii.rlecode_hqx(self.data)
+ self.ofp.write(rledata)
+ self.ofp.close()
+ del self.ofp
+
+class BinHex:
+ def __init__(self, name_finfo_dlen_rlen, ofp):
+ name, finfo, dlen, rlen = name_finfo_dlen_rlen
+ if type(ofp) == type(''):
+ ofname = ofp
+ ofp = open(ofname, 'w')
+ ofp.write('(This file must be converted with BinHex 4.0)\n\n:')
+ hqxer = _Hqxcoderengine(ofp)
+ self.ofp = _Rlecoderengine(hqxer)
+ self.crc = 0
+ if finfo is None:
+ finfo = FInfo()
+ self.dlen = dlen
+ self.rlen = rlen
+ self._writeinfo(name, finfo)
+ self.state = _DID_HEADER
+
+ def _writeinfo(self, name, finfo):
+ nl = len(name)
+ if nl > 63:
+ raise Error, 'Filename too long'
+ d = chr(nl) + name + '\0'
+ d2 = finfo.Type + finfo.Creator
+
+ # Force all structs to be packed with big-endian
+ d3 = struct.pack('>h', finfo.Flags)
+ d4 = struct.pack('>ii', self.dlen, self.rlen)
+ info = d + d2 + d3 + d4
+ self._write(info)
+ self._writecrc()
+
+ def _write(self, data):
+ self.crc = binascii.crc_hqx(data, self.crc)
+ self.ofp.write(data)
+
+ def _writecrc(self):
+ # XXXX Should this be here??
+ # self.crc = binascii.crc_hqx('\0\0', self.crc)
+ if self.crc < 0:
+ fmt = '>h'
+ else:
+ fmt = '>H'
+ self.ofp.write(struct.pack(fmt, self.crc))
+ self.crc = 0
+
+ def write(self, data):
+ if self.state != _DID_HEADER:
+ raise Error, 'Writing data at the wrong time'
+ self.dlen = self.dlen - len(data)
+ self._write(data)
+
+ def close_data(self):
+ if self.dlen != 0:
+ raise Error, 'Incorrect data size, diff=%r' % (self.rlen,)
+ self._writecrc()
+ self.state = _DID_DATA
+
+ def write_rsrc(self, data):
+ if self.state < _DID_DATA:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Writing resource data at the wrong time'
+ self.rlen = self.rlen - len(data)
+ self._write(data)
+
+ def close(self):
+ if self.state is None:
+ return
+ try:
+ if self.state < _DID_DATA:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Close at the wrong time'
+ if self.rlen != 0:
+ raise Error, \
+ "Incorrect resource-datasize, diff=%r" % (self.rlen,)
+ self._writecrc()
+ finally:
+ self.state = None
+ ofp = self.ofp
+ del self.ofp
+ ofp.close()
+
+def binhex(inp, out):
+ """(infilename, outfilename) - Create binhex-encoded copy of a file"""
+ finfo = getfileinfo(inp)
+ ofp = BinHex(finfo, out)
+
+ ifp = open(inp, 'rb')
+ # XXXX Do textfile translation on non-mac systems
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close_data()
+ ifp.close()
+
+ ifp = openrsrc(inp, 'rb')
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write_rsrc(d)
+ ofp.close()
+ ifp.close()
+
+class _Hqxdecoderengine:
+ """Read data via the decoder in 4-byte chunks"""
+
+ def __init__(self, ifp):
+ self.ifp = ifp
+ self.eof = 0
+
+ def read(self, totalwtd):
+ """Read at least wtd bytes (or until EOF)"""
+ decdata = ''
+ wtd = totalwtd
+ #
+ # The loop here is convoluted, since we don't really now how
+ # much to decode: there may be newlines in the incoming data.
+ while wtd > 0:
+ if self.eof: return decdata
+ wtd = ((wtd+2)//3)*4
+ data = self.ifp.read(wtd)
+ #
+ # Next problem: there may not be a complete number of
+ # bytes in what we pass to a2b. Solve by yet another
+ # loop.
+ #
+ while 1:
+ try:
+ decdatacur, self.eof = \
+ binascii.a2b_hqx(data)
+ break
+ except binascii.Incomplete:
+ pass
+ newdata = self.ifp.read(1)
+ if not newdata:
+ raise Error, \
+ 'Premature EOF on binhex file'
+ data = data + newdata
+ decdata = decdata + decdatacur
+ wtd = totalwtd - len(decdata)
+ if not decdata and not self.eof:
+ raise Error, 'Premature EOF on binhex file'
+ return decdata
+
+ def close(self):
+ self.ifp.close()
+
+class _Rledecoderengine:
+ """Read data via the RLE-coder"""
+
+ def __init__(self, ifp):
+ self.ifp = ifp
+ self.pre_buffer = ''
+ self.post_buffer = ''
+ self.eof = 0
+
+ def read(self, wtd):
+ if wtd > len(self.post_buffer):
+ self._fill(wtd-len(self.post_buffer))
+ rv = self.post_buffer[:wtd]
+ self.post_buffer = self.post_buffer[wtd:]
+ return rv
+
+ def _fill(self, wtd):
+ self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4)
+ if self.ifp.eof:
+ self.post_buffer = self.post_buffer + \
+ binascii.rledecode_hqx(self.pre_buffer)
+ self.pre_buffer = ''
+ return
+
+ #
+ # Obfuscated code ahead. We have to take care that we don't
+ # end up with an orphaned RUNCHAR later on. So, we keep a couple
+ # of bytes in the buffer, depending on what the end of
+ # the buffer looks like:
+ # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0)
+ # '?\220' - Keep 2 bytes: repeated something-else
+ # '\220\0' - Escaped \220: Keep 2 bytes.
+ # '?\220?' - Complete repeat sequence: decode all
+ # otherwise: keep 1 byte.
+ #
+ mark = len(self.pre_buffer)
+ if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR:
+ mark = mark - 3
+ elif self.pre_buffer[-1] == RUNCHAR:
+ mark = mark - 2
+ elif self.pre_buffer[-2:] == RUNCHAR + '\0':
+ mark = mark - 2
+ elif self.pre_buffer[-2] == RUNCHAR:
+ pass # Decode all
+ else:
+ mark = mark - 1
+
+ self.post_buffer = self.post_buffer + \
+ binascii.rledecode_hqx(self.pre_buffer[:mark])
+ self.pre_buffer = self.pre_buffer[mark:]
+
+ def close(self):
+ self.ifp.close()
+
+class HexBin:
+ def __init__(self, ifp):
+ if type(ifp) == type(''):
+ ifp = open(ifp)
+ #
+ # Find initial colon.
+ #
+ while 1:
+ ch = ifp.read(1)
+ if not ch:
+ raise Error, "No binhex data found"
+ # Cater for \r\n terminated lines (which show up as \n\r, hence
+ # all lines start with \r)
+ if ch == '\r':
+ continue
+ if ch == ':':
+ break
+ if ch != '\n':
+ dummy = ifp.readline()
+
+ hqxifp = _Hqxdecoderengine(ifp)
+ self.ifp = _Rledecoderengine(hqxifp)
+ self.crc = 0
+ self._readheader()
+
+ def _read(self, len):
+ data = self.ifp.read(len)
+ self.crc = binascii.crc_hqx(data, self.crc)
+ return data
+
+ def _checkcrc(self):
+ filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff
+ #self.crc = binascii.crc_hqx('\0\0', self.crc)
+ # XXXX Is this needed??
+ self.crc = self.crc & 0xffff
+ if filecrc != self.crc:
+ raise Error, 'CRC error, computed %x, read %x' \
+ %(self.crc, filecrc)
+ self.crc = 0
+
+ def _readheader(self):
+ len = self._read(1)
+ fname = self._read(ord(len))
+ rest = self._read(1+4+4+2+4+4)
+ self._checkcrc()
+
+ type = rest[1:5]
+ creator = rest[5:9]
+ flags = struct.unpack('>h', rest[9:11])[0]
+ self.dlen = struct.unpack('>l', rest[11:15])[0]
+ self.rlen = struct.unpack('>l', rest[15:19])[0]
+
+ self.FName = fname
+ self.FInfo = FInfo()
+ self.FInfo.Creator = creator
+ self.FInfo.Type = type
+ self.FInfo.Flags = flags
+
+ self.state = _DID_HEADER
+
+ def read(self, *n):
+ if self.state != _DID_HEADER:
+ raise Error, 'Read data at wrong time'
+ if n:
+ n = n[0]
+ n = min(n, self.dlen)
+ else:
+ n = self.dlen
+ rv = ''
+ while len(rv) < n:
+ rv = rv + self._read(n-len(rv))
+ self.dlen = self.dlen - n
+ return rv
+
+ def close_data(self):
+ if self.state != _DID_HEADER:
+ raise Error, 'close_data at wrong time'
+ if self.dlen:
+ dummy = self._read(self.dlen)
+ self._checkcrc()
+ self.state = _DID_DATA
+
+ def read_rsrc(self, *n):
+ if self.state == _DID_HEADER:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Read resource data at wrong time'
+ if n:
+ n = n[0]
+ n = min(n, self.rlen)
+ else:
+ n = self.rlen
+ self.rlen = self.rlen - n
+ return self._read(n)
+
+ def close(self):
+ if self.state is None:
+ return
+ try:
+ if self.rlen:
+ dummy = self.read_rsrc(self.rlen)
+ self._checkcrc()
+ finally:
+ self.state = None
+ self.ifp.close()
+
+def hexbin(inp, out):
+ """(infilename, outfilename) - Decode binhexed file"""
+ ifp = HexBin(inp)
+ finfo = ifp.FInfo
+ if not out:
+ out = ifp.FName
+
+ ofp = open(out, 'wb')
+ # XXXX Do translation on non-mac systems
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close()
+ ifp.close_data()
+
+ d = ifp.read_rsrc(128000)
+ if d:
+ ofp = openrsrc(out, 'wb')
+ ofp.write(d)
+ while 1:
+ d = ifp.read_rsrc(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close()
+
+ ifp.close()
+
+def _test():
+ fname = sys.argv[1]
+ binhex(fname, fname+'.hqx')
+ hexbin(fname+'.hqx', fname+'.viahqx')
+ #hexbin(fname, fname+'.unpacked')
+ sys.exit(1)
+
+if __name__ == '__main__':
+ _test()
diff --git a/cashew/Lib/bisect.py b/cashew/Lib/bisect.py
new file mode 100644
index 0000000..4a4d052
--- /dev/null
+++ b/cashew/Lib/bisect.py
@@ -0,0 +1,92 @@
+"""Bisection algorithms."""
+
+def insort_right(a, x, lo=0, hi=None):
+ """Insert item x in list a, and keep it sorted assuming a is sorted.
+
+ If x is already in a, insert it to the right of the rightmost x.
+
+ Optional args lo (default 0) and hi (default len(a)) bound the
+ slice of a to be searched.
+ """
+
+ if lo < 0:
+ raise ValueError('lo must be non-negative')
+ if hi is None:
+ hi = len(a)
+ while lo < hi:
+ mid = (lo+hi)//2
+ if x < a[mid]: hi = mid
+ else: lo = mid+1
+ a.insert(lo, x)
+
+insort = insort_right # backward compatibility
+
+def bisect_right(a, x, lo=0, hi=None):
+ """Return the index where to insert item x in list a, assuming a is sorted.
+
+ The return value i is such that all e in a[:i] have e <= x, and all e in
+ a[i:] have e > x. So if x already appears in the list, a.insert(x) will
+ insert just after the rightmost x already there.
+
+ Optional args lo (default 0) and hi (default len(a)) bound the
+ slice of a to be searched.
+ """
+
+ if lo < 0:
+ raise ValueError('lo must be non-negative')
+ if hi is None:
+ hi = len(a)
+ while lo < hi:
+ mid = (lo+hi)//2
+ if x < a[mid]: hi = mid
+ else: lo = mid+1
+ return lo
+
+bisect = bisect_right # backward compatibility
+
+def insort_left(a, x, lo=0, hi=None):
+ """Insert item x in list a, and keep it sorted assuming a is sorted.
+
+ If x is already in a, insert it to the left of the leftmost x.
+
+ Optional args lo (default 0) and hi (default len(a)) bound the
+ slice of a to be searched.
+ """
+
+ if lo < 0:
+ raise ValueError('lo must be non-negative')
+ if hi is None:
+ hi = len(a)
+ while lo < hi:
+ mid = (lo+hi)//2
+ if a[mid] < x: lo = mid+1
+ else: hi = mid
+ a.insert(lo, x)
+
+
+def bisect_left(a, x, lo=0, hi=None):
+ """Return the index where to insert item x in list a, assuming a is sorted.
+
+ The return value i is such that all e in a[:i] have e < x, and all e in
+ a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
+ insert just before the leftmost x already there.
+
+ Optional args lo (default 0) and hi (default len(a)) bound the
+ slice of a to be searched.
+ """
+
+ if lo < 0:
+ raise ValueError('lo must be non-negative')
+ if hi is None:
+ hi = len(a)
+ while lo < hi:
+ mid = (lo+hi)//2
+ if a[mid] < x: lo = mid+1
+ else: hi = mid
+ return lo
+
+# Overwrite above definitions with a fast C implementation
+try:
+ from _bisect import *
+except ImportError:
+ pass
diff --git a/cashew/Lib/calendar.py b/cashew/Lib/calendar.py
new file mode 100644
index 0000000..8fcceb7
--- /dev/null
+++ b/cashew/Lib/calendar.py
@@ -0,0 +1,714 @@
+"""Calendar printing functions
+
+Note when comparing these calendars to the ones printed by cal(1): By
+default, these calendars have Monday as the first day of the week, and
+Sunday as the last (the European convention). Use setfirstweekday() to
+set the first day of the week (0=Monday, 6=Sunday)."""
+
+import sys
+import datetime
+import locale as _locale
+
+__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday",
+ "firstweekday", "isleap", "leapdays", "weekday", "monthrange",
+ "monthcalendar", "prmonth", "month", "prcal", "calendar",
+ "timegm", "month_name", "month_abbr", "day_name", "day_abbr"]
+
+# Exception raised for bad input (with string parameter for details)
+error = ValueError
+
+# Exceptions raised for bad input
+class IllegalMonthError(ValueError):
+ def __init__(self, month):
+ self.month = month
+ def __str__(self):
+ return "bad month number %r; must be 1-12" % self.month
+
+
+class IllegalWeekdayError(ValueError):
+ def __init__(self, weekday):
+ self.weekday = weekday
+ def __str__(self):
+ return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday
+
+
+# Constants for months referenced later
+January = 1
+February = 2
+
+# Number of days per month (except for February in leap years)
+mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+# This module used to have hard-coded lists of day and month names, as
+# English strings. The classes following emulate a read-only version of
+# that, but supply localized names. Note that the values are computed
+# fresh on each call, in case the user changes locale between calls.
+
+class _localized_month:
+
+ _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)]
+ _months.insert(0, lambda x: "")
+
+ def __init__(self, format):
+ self.format = format
+
+ def __getitem__(self, i):
+ funcs = self._months[i]
+ if isinstance(i, slice):
+ return [f(self.format) for f in funcs]
+ else:
+ return funcs(self.format)
+
+ def __len__(self):
+ return 13
+
+
+class _localized_day:
+
+ # January 1, 2001, was a Monday.
+ _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)]
+
+ def __init__(self, format):
+ self.format = format
+
+ def __getitem__(self, i):
+ funcs = self._days[i]
+ if isinstance(i, slice):
+ return [f(self.format) for f in funcs]
+ else:
+ return funcs(self.format)
+
+ def __len__(self):
+ return 7
+
+
+# Full and abbreviated names of weekdays
+day_name = _localized_day('%A')
+day_abbr = _localized_day('%a')
+
+# Full and abbreviated names of months (1-based arrays!!!)
+month_name = _localized_month('%B')
+month_abbr = _localized_month('%b')
+
+# Constants for weekdays
+(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7)
+
+
+def isleap(year):
+ """Return True for leap years, False for non-leap years."""
+ return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
+
+
+def leapdays(y1, y2):
+ """Return number of leap years in range [y1, y2).
+ Assume y1 <= y2."""
+ y1 -= 1
+ y2 -= 1
+ return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400)
+
+
+def weekday(year, month, day):
+ """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
+ day (1-31)."""
+ return datetime.date(year, month, day).weekday()
+
+
+def monthrange(year, month):
+ """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for
+ year, month."""
+ if not 1 <= month <= 12:
+ raise IllegalMonthError(month)
+ day1 = weekday(year, month, 1)
+ ndays = mdays[month] + (month == February and isleap(year))
+ return day1, ndays
+
+
+class Calendar(object):
+ """
+ Base calendar class. This class doesn't do any formatting. It simply
+ provides data to subclasses.
+ """
+
+ def __init__(self, firstweekday=0):
+ self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday
+
+ def getfirstweekday(self):
+ return self._firstweekday % 7
+
+ def setfirstweekday(self, firstweekday):
+ self._firstweekday = firstweekday
+
+ firstweekday = property(getfirstweekday, setfirstweekday)
+
+ def iterweekdays(self):
+ """
+ Return an iterator for one week of weekday numbers starting with the
+ configured first one.
+ """
+ for i in range(self.firstweekday, self.firstweekday + 7):
+ yield i%7
+
+ def itermonthdates(self, year, month):
+ """
+ Return an iterator for one month. The iterator will yield datetime.date
+ values and will always iterate through complete weeks, so it will yield
+ dates outside the specified month.
+ """
+ date = datetime.date(year, month, 1)
+ # Go back to the beginning of the week
+ days = (date.weekday() - self.firstweekday) % 7
+ date -= datetime.timedelta(days=days)
+ oneday = datetime.timedelta(days=1)
+ while True:
+ yield date
+ try:
+ date += oneday
+ except OverflowError:
+ # Adding one day could fail after datetime.MAXYEAR
+ break
+ if date.month != month and date.weekday() == self.firstweekday:
+ break
+
+ def itermonthdays2(self, year, month):
+ """
+ Like itermonthdates(), but will yield (day number, weekday number)
+ tuples. For days outside the specified month the day number is 0.
+ """
+ for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday):
+ yield d, i % 7
+
+ def itermonthdays(self, year, month):
+ """
+ Like itermonthdates(), but will yield day numbers. For days outside
+ the specified month the day number is 0.
+ """
+ day1, ndays = monthrange(year, month)
+ days_before = (day1 - self.firstweekday) % 7
+ for _ in range(days_before):
+ yield 0
+ for d in range(1, ndays + 1):
+ yield d
+ days_after = (self.firstweekday - day1 - ndays) % 7
+ for _ in range(days_after):
+ yield 0
+
+ def monthdatescalendar(self, year, month):
+ """
+ Return a matrix (list of lists) representing a month's calendar.
+ Each row represents a week; week entries are datetime.date values.
+ """
+ dates = list(self.itermonthdates(year, month))
+ return [ dates[i:i+7] for i in range(0, len(dates), 7) ]
+
+ def monthdays2calendar(self, year, month):
+ """
+ Return a matrix representing a month's calendar.
+ Each row represents a week; week entries are
+ (day number, weekday number) tuples. Day numbers outside this month
+ are zero.
+ """
+ days = list(self.itermonthdays2(year, month))
+ return [ days[i:i+7] for i in range(0, len(days), 7) ]
+
+ def monthdayscalendar(self, year, month):
+ """
+ Return a matrix representing a month's calendar.
+ Each row represents a week; days outside this month are zero.
+ """
+ days = list(self.itermonthdays(year, month))
+ return [ days[i:i+7] for i in range(0, len(days), 7) ]
+
+ def yeardatescalendar(self, year, width=3):
+ """
+ Return the data for the specified year ready for formatting. The return
+ value is a list of month rows. Each month row contains up to width months.
+ Each month contains between 4 and 6 weeks and each week contains 1-7
+ days. Days are datetime.date objects.
+ """
+ months = [
+ self.monthdatescalendar(year, i)
+ for i in range(January, January+12)
+ ]
+ return [months[i:i+width] for i in range(0, len(months), width) ]
+
+ def yeardays2calendar(self, year, width=3):
+ """
+ Return the data for the specified year ready for formatting (similar to
+ yeardatescalendar()). Entries in the week lists are
+ (day number, weekday number) tuples. Day numbers outside this month are
+ zero.
+ """
+ months = [
+ self.monthdays2calendar(year, i)
+ for i in range(January, January+12)
+ ]
+ return [months[i:i+width] for i in range(0, len(months), width) ]
+
+ def yeardayscalendar(self, year, width=3):
+ """
+ Return the data for the specified year ready for formatting (similar to
+ yeardatescalendar()). Entries in the week lists are day numbers.
+ Day numbers outside this month are zero.
+ """
+ months = [
+ self.monthdayscalendar(year, i)
+ for i in range(January, January+12)
+ ]
+ return [months[i:i+width] for i in range(0, len(months), width) ]
+
+
+class TextCalendar(Calendar):
+ """
+ Subclass of Calendar that outputs a calendar as a simple plain text
+ similar to the UNIX program cal.
+ """
+
+ def prweek(self, theweek, width):
+ """
+ Print a single week (no newline).
+ """
+ print self.formatweek(theweek, width),
+
+ def formatday(self, day, weekday, width):
+ """
+ Returns a formatted day.
+ """
+ if day == 0:
+ s = ''
+ else:
+ s = '%2i' % day # right-align single-digit days
+ return s.center(width)
+
+ def formatweek(self, theweek, width):
+ """
+ Returns a single week in a string (no newline).
+ """
+ return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek)
+
+ def formatweekday(self, day, width):
+ """
+ Returns a formatted week day name.
+ """
+ if width >= 9:
+ names = day_name
+ else:
+ names = day_abbr
+ return names[day][:width].center(width)
+
+ def formatweekheader(self, width):
+ """
+ Return a header for a week.
+ """
+ return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays())
+
+ def formatmonthname(self, theyear, themonth, width, withyear=True):
+ """
+ Return a formatted month name.
+ """
+ s = month_name[themonth]
+ if withyear:
+ s = "%s %r" % (s, theyear)
+ return s.center(width)
+
+ def prmonth(self, theyear, themonth, w=0, l=0):
+ """
+ Print a month's calendar.
+ """
+ print self.formatmonth(theyear, themonth, w, l),
+
+ def formatmonth(self, theyear, themonth, w=0, l=0):
+ """
+ Return a month's calendar string (multi-line).
+ """
+ w = max(2, w)
+ l = max(1, l)
+ s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1)
+ s = s.rstrip()
+ s += '\n' * l
+ s += self.formatweekheader(w).rstrip()
+ s += '\n' * l
+ for week in self.monthdays2calendar(theyear, themonth):
+ s += self.formatweek(week, w).rstrip()
+ s += '\n' * l
+ return s
+
+ def formatyear(self, theyear, w=2, l=1, c=6, m=3):
+ """
+ Returns a year's calendar as a multi-line string.
+ """
+ w = max(2, w)
+ l = max(1, l)
+ c = max(2, c)
+ colwidth = (w + 1) * 7 - 1
+ v = []
+ a = v.append
+ a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip())
+ a('\n'*l)
+ header = self.formatweekheader(w)
+ for (i, row) in enumerate(self.yeardays2calendar(theyear, m)):
+ # months in this row
+ months = range(m*i+1, min(m*(i+1)+1, 13))
+ a('\n'*l)
+ names = (self.formatmonthname(theyear, k, colwidth, False)
+ for k in months)
+ a(formatstring(names, colwidth, c).rstrip())
+ a('\n'*l)
+ headers = (header for k in months)
+ a(formatstring(headers, colwidth, c).rstrip())
+ a('\n'*l)
+ # max number of weeks for this row
+ height = max(len(cal) for cal in row)
+ for j in range(height):
+ weeks = []
+ for cal in row:
+ if j >= len(cal):
+ weeks.append('')
+ else:
+ weeks.append(self.formatweek(cal[j], w))
+ a(formatstring(weeks, colwidth, c).rstrip())
+ a('\n' * l)
+ return ''.join(v)
+
+ def pryear(self, theyear, w=0, l=0, c=6, m=3):
+ """Print a year's calendar."""
+ print self.formatyear(theyear, w, l, c, m)
+
+
+class HTMLCalendar(Calendar):
+ """
+ This calendar returns complete HTML pages.
+ """
+
+ # CSS classes for the day s
+ cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
+
+ def formatday(self, day, weekday):
+ """
+ Return a day as a table cell.
+ """
+ if day == 0:
+ return ' ' # day outside month
+ else:
+ return '%d ' % (self.cssclasses[weekday], day)
+
+ def formatweek(self, theweek):
+ """
+ Return a complete week as a table row.
+ """
+ s = ''.join(self.formatday(d, wd) for (d, wd) in theweek)
+ return '%s ' % s
+
+ def formatweekday(self, day):
+ """
+ Return a weekday name as a table header.
+ """
+ return '%s ' % (self.cssclasses[day], day_abbr[day])
+
+ def formatweekheader(self):
+ """
+ Return a header for a week as a table row.
+ """
+ s = ''.join(self.formatweekday(i) for i in self.iterweekdays())
+ return '%s ' % s
+
+ def formatmonthname(self, theyear, themonth, withyear=True):
+ """
+ Return a month name as a table row.
+ """
+ if withyear:
+ s = '%s %s' % (month_name[themonth], theyear)
+ else:
+ s = '%s' % month_name[themonth]
+ return '%s ' % s
+
+ def formatmonth(self, theyear, themonth, withyear=True):
+ """
+ Return a formatted month as a table.
+ """
+ v = []
+ a = v.append
+ a('')
+ a('\n')
+ a(self.formatmonthname(theyear, themonth, withyear=withyear))
+ a('\n')
+ a(self.formatweekheader())
+ a('\n')
+ for week in self.monthdays2calendar(theyear, themonth):
+ a(self.formatweek(week))
+ a('\n')
+ a('
')
+ a('\n')
+ return ''.join(v)
+
+ def formatyear(self, theyear, width=3):
+ """
+ Return a formatted year as a table of tables.
+ """
+ v = []
+ a = v.append
+ width = max(width, 1)
+ a('')
+ a('\n')
+ a('%s ' % (width, theyear))
+ for i in range(January, January+12, width):
+ # months in this row
+ months = range(i, min(i+width, 13))
+ a('')
+ for m in months:
+ a('')
+ a(self.formatmonth(theyear, m, withyear=False))
+ a(' ')
+ a(' ')
+ a('
')
+ return ''.join(v)
+
+ def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None):
+ """
+ Return a formatted year as a complete HTML page.
+ """
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ v = []
+ a = v.append
+ a('\n' % encoding)
+ a('\n')
+ a('\n')
+ a('\n')
+ a(' \n' % encoding)
+ if css is not None:
+ a(' \n' % css)
+ a('Calendar for %d \n' % theyear)
+ a('\n')
+ a('\n')
+ a(self.formatyear(theyear, width))
+ a('\n')
+ a('\n')
+ return ''.join(v).encode(encoding, "xmlcharrefreplace")
+
+
+class TimeEncoding:
+ def __init__(self, locale):
+ self.locale = locale
+
+ def __enter__(self):
+ self.oldlocale = _locale.getlocale(_locale.LC_TIME)
+ _locale.setlocale(_locale.LC_TIME, self.locale)
+ return _locale.getlocale(_locale.LC_TIME)[1]
+
+ def __exit__(self, *args):
+ _locale.setlocale(_locale.LC_TIME, self.oldlocale)
+
+
+class LocaleTextCalendar(TextCalendar):
+ """
+ This class can be passed a locale name in the constructor and will return
+ month and weekday names in the specified locale. If this locale includes
+ an encoding all strings containing month and weekday names will be returned
+ as unicode.
+ """
+
+ def __init__(self, firstweekday=0, locale=None):
+ TextCalendar.__init__(self, firstweekday)
+ if locale is None:
+ locale = _locale.getdefaultlocale()
+ self.locale = locale
+
+ def formatweekday(self, day, width):
+ with TimeEncoding(self.locale) as encoding:
+ if width >= 9:
+ names = day_name
+ else:
+ names = day_abbr
+ name = names[day]
+ if encoding is not None:
+ name = name.decode(encoding)
+ return name[:width].center(width)
+
+ def formatmonthname(self, theyear, themonth, width, withyear=True):
+ with TimeEncoding(self.locale) as encoding:
+ s = month_name[themonth]
+ if encoding is not None:
+ s = s.decode(encoding)
+ if withyear:
+ s = "%s %r" % (s, theyear)
+ return s.center(width)
+
+
+class LocaleHTMLCalendar(HTMLCalendar):
+ """
+ This class can be passed a locale name in the constructor and will return
+ month and weekday names in the specified locale. If this locale includes
+ an encoding all strings containing month and weekday names will be returned
+ as unicode.
+ """
+ def __init__(self, firstweekday=0, locale=None):
+ HTMLCalendar.__init__(self, firstweekday)
+ if locale is None:
+ locale = _locale.getdefaultlocale()
+ self.locale = locale
+
+ def formatweekday(self, day):
+ with TimeEncoding(self.locale) as encoding:
+ s = day_abbr[day]
+ if encoding is not None:
+ s = s.decode(encoding)
+ return '%s ' % (self.cssclasses[day], s)
+
+ def formatmonthname(self, theyear, themonth, withyear=True):
+ with TimeEncoding(self.locale) as encoding:
+ s = month_name[themonth]
+ if encoding is not None:
+ s = s.decode(encoding)
+ if withyear:
+ s = '%s %s' % (s, theyear)
+ return '%s ' % s
+
+
+# Support for old module level interface
+c = TextCalendar()
+
+firstweekday = c.getfirstweekday
+
+def setfirstweekday(firstweekday):
+ try:
+ firstweekday.__index__
+ except AttributeError:
+ raise IllegalWeekdayError(firstweekday)
+ if not MONDAY <= firstweekday <= SUNDAY:
+ raise IllegalWeekdayError(firstweekday)
+ c.firstweekday = firstweekday
+
+monthcalendar = c.monthdayscalendar
+prweek = c.prweek
+week = c.formatweek
+weekheader = c.formatweekheader
+prmonth = c.prmonth
+month = c.formatmonth
+calendar = c.formatyear
+prcal = c.pryear
+
+
+# Spacing of month columns for multi-column year calendar
+_colwidth = 7*3 - 1 # Amount printed by prweek()
+_spacing = 6 # Number of spaces between columns
+
+
+def format(cols, colwidth=_colwidth, spacing=_spacing):
+ """Prints multi-column formatting for year calendars"""
+ print formatstring(cols, colwidth, spacing)
+
+
+def formatstring(cols, colwidth=_colwidth, spacing=_spacing):
+ """Returns a string formatted from n strings, centered within n columns."""
+ spacing *= ' '
+ return spacing.join(c.center(colwidth) for c in cols)
+
+
+EPOCH = 1970
+_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal()
+
+
+def timegm(tuple):
+ """Unrelated but handy function to calculate Unix timestamp from GMT."""
+ year, month, day, hour, minute, second = tuple[:6]
+ days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1
+ hours = days*24 + hour
+ minutes = hours*60 + minute
+ seconds = minutes*60 + second
+ return seconds
+
+
+def main(args):
+ import optparse
+ parser = optparse.OptionParser(usage="usage: %prog [options] [year [month]]")
+ parser.add_option(
+ "-w", "--width",
+ dest="width", type="int", default=2,
+ help="width of date column (default 2, text only)"
+ )
+ parser.add_option(
+ "-l", "--lines",
+ dest="lines", type="int", default=1,
+ help="number of lines for each week (default 1, text only)"
+ )
+ parser.add_option(
+ "-s", "--spacing",
+ dest="spacing", type="int", default=6,
+ help="spacing between months (default 6, text only)"
+ )
+ parser.add_option(
+ "-m", "--months",
+ dest="months", type="int", default=3,
+ help="months per row (default 3, text only)"
+ )
+ parser.add_option(
+ "-c", "--css",
+ dest="css", default="calendar.css",
+ help="CSS to use for page (html only)"
+ )
+ parser.add_option(
+ "-L", "--locale",
+ dest="locale", default=None,
+ help="locale to be used from month and weekday names"
+ )
+ parser.add_option(
+ "-e", "--encoding",
+ dest="encoding", default=None,
+ help="Encoding to use for output"
+ )
+ parser.add_option(
+ "-t", "--type",
+ dest="type", default="text",
+ choices=("text", "html"),
+ help="output type (text or html)"
+ )
+
+ (options, args) = parser.parse_args(args)
+
+ if options.locale and not options.encoding:
+ parser.error("if --locale is specified --encoding is required")
+ sys.exit(1)
+
+ locale = options.locale, options.encoding
+
+ if options.type == "html":
+ if options.locale:
+ cal = LocaleHTMLCalendar(locale=locale)
+ else:
+ cal = HTMLCalendar()
+ encoding = options.encoding
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ optdict = dict(encoding=encoding, css=options.css)
+ if len(args) == 1:
+ print cal.formatyearpage(datetime.date.today().year, **optdict)
+ elif len(args) == 2:
+ print cal.formatyearpage(int(args[1]), **optdict)
+ else:
+ parser.error("incorrect number of arguments")
+ sys.exit(1)
+ else:
+ if options.locale:
+ cal = LocaleTextCalendar(locale=locale)
+ else:
+ cal = TextCalendar()
+ optdict = dict(w=options.width, l=options.lines)
+ if len(args) != 3:
+ optdict["c"] = options.spacing
+ optdict["m"] = options.months
+ if len(args) == 1:
+ result = cal.formatyear(datetime.date.today().year, **optdict)
+ elif len(args) == 2:
+ result = cal.formatyear(int(args[1]), **optdict)
+ elif len(args) == 3:
+ result = cal.formatmonth(int(args[1]), int(args[2]), **optdict)
+ else:
+ parser.error("incorrect number of arguments")
+ sys.exit(1)
+ if options.encoding:
+ result = result.encode(options.encoding)
+ print result
+
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/cashew/Lib/cgi.py b/cashew/Lib/cgi.py
new file mode 100644
index 0000000..7c51b44
--- /dev/null
+++ b/cashew/Lib/cgi.py
@@ -0,0 +1,1059 @@
+#! /usr/local/bin/python
+
+# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
+# intentionally NOT "/usr/bin/env python". On many systems
+# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
+# scripts, and /usr/local/bin is the default directory where Python is
+# installed, so /usr/bin/env would be unable to find python. Granted,
+# binary installations by Linux vendors often install Python in
+# /usr/bin. So let those vendors patch cgi.py to match their choice
+# of installation.
+
+"""Support module for CGI (Common Gateway Interface) scripts.
+
+This module defines a number of utilities for use by CGI scripts
+written in Python.
+"""
+
+# XXX Perhaps there should be a slimmed version that doesn't contain
+# all those backwards compatible and debugging classes and functions?
+
+# History
+# -------
+#
+# Michael McLay started this module. Steve Majewski changed the
+# interface to SvFormContentDict and FormContentDict. The multipart
+# parsing was inspired by code submitted by Andreas Paepcke. Guido van
+# Rossum rewrote, reformatted and documented the module and is currently
+# responsible for its maintenance.
+#
+
+__version__ = "2.6"
+
+
+# Imports
+# =======
+
+from operator import attrgetter
+import sys
+import os
+import UserDict
+import urlparse
+
+from warnings import filterwarnings, catch_warnings, warn
+with catch_warnings():
+ if sys.py3kwarning:
+ filterwarnings("ignore", ".*mimetools has been removed",
+ DeprecationWarning)
+ filterwarnings("ignore", ".*rfc822 has been removed",
+ DeprecationWarning)
+ import mimetools
+ import rfc822
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+__all__ = ["MiniFieldStorage", "FieldStorage", "FormContentDict",
+ "SvFormContentDict", "InterpFormContentDict", "FormContent",
+ "parse", "parse_qs", "parse_qsl", "parse_multipart",
+ "parse_header", "print_exception", "print_environ",
+ "print_form", "print_directory", "print_arguments",
+ "print_environ_usage", "escape"]
+
+# Logging support
+# ===============
+
+logfile = "" # Filename to log to, if not empty
+logfp = None # File object to log to, if not None
+
+def initlog(*allargs):
+ """Write a log message, if there is a log file.
+
+ Even though this function is called initlog(), you should always
+ use log(); log is a variable that is set either to initlog
+ (initially), to dolog (once the log file has been opened), or to
+ nolog (when logging is disabled).
+
+ The first argument is a format string; the remaining arguments (if
+ any) are arguments to the % operator, so e.g.
+ log("%s: %s", "a", "b")
+ will write "a: b" to the log file, followed by a newline.
+
+ If the global logfp is not None, it should be a file object to
+ which log data is written.
+
+ If the global logfp is None, the global logfile may be a string
+ giving a filename to open, in append mode. This file should be
+ world writable!!! If the file can't be opened, logging is
+ silently disabled (since there is no safe place where we could
+ send an error message).
+
+ """
+ global logfp, log
+ if logfile and not logfp:
+ try:
+ logfp = open(logfile, "a")
+ except IOError:
+ pass
+ if not logfp:
+ log = nolog
+ else:
+ log = dolog
+ log(*allargs)
+
+def dolog(fmt, *args):
+ """Write a log message to the log file. See initlog() for docs."""
+ logfp.write(fmt%args + "\n")
+
+def nolog(*allargs):
+ """Dummy function, assigned to log when logging is disabled."""
+ pass
+
+log = initlog # The current logging function
+
+
+# Parsing functions
+# =================
+
+# Maximum input we will accept when REQUEST_METHOD is POST
+# 0 ==> unlimited input
+maxlen = 0
+
+def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+ """Parse a query in the environment or from a file (default stdin)
+
+ Arguments, all optional:
+
+ fp : file pointer; default: sys.stdin
+
+ environ : environment dictionary; default: os.environ
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded forms should be treated as blank strings.
+ A true value indicates that blanks should be retained as
+ blank strings. The default false value indicates that
+ blank values are to be ignored and treated as if they were
+ not included.
+
+ strict_parsing: flag indicating what to do with parsing errors.
+ If false (the default), errors are silently ignored.
+ If true, errors raise a ValueError exception.
+ """
+ if fp is None:
+ fp = sys.stdin
+ if not 'REQUEST_METHOD' in environ:
+ environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
+ if environ['REQUEST_METHOD'] == 'POST':
+ ctype, pdict = parse_header(environ['CONTENT_TYPE'])
+ if ctype == 'multipart/form-data':
+ return parse_multipart(fp, pdict)
+ elif ctype == 'application/x-www-form-urlencoded':
+ clength = int(environ['CONTENT_LENGTH'])
+ if maxlen and clength > maxlen:
+ raise ValueError, 'Maximum content length exceeded'
+ qs = fp.read(clength)
+ else:
+ qs = '' # Unknown content-type
+ if 'QUERY_STRING' in environ:
+ if qs: qs = qs + '&'
+ qs = qs + environ['QUERY_STRING']
+ elif sys.argv[1:]:
+ if qs: qs = qs + '&'
+ qs = qs + sys.argv[1]
+ environ['QUERY_STRING'] = qs # XXX Shouldn't, really
+ elif 'QUERY_STRING' in environ:
+ qs = environ['QUERY_STRING']
+ else:
+ if sys.argv[1:]:
+ qs = sys.argv[1]
+ else:
+ qs = ""
+ environ['QUERY_STRING'] = qs # XXX Shouldn't, really
+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
+
+
+# parse query string function called from urlparse,
+# this is done in order to maintain backward compatibility.
+
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
+ """Parse a query given as a string argument."""
+ warn("cgi.parse_qs is deprecated, use urlparse.parse_qs instead",
+ PendingDeprecationWarning, 2)
+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
+
+
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
+ """Parse a query given as a string argument."""
+ warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead",
+ PendingDeprecationWarning, 2)
+ return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing)
+
+def parse_multipart(fp, pdict):
+ """Parse multipart input.
+
+ Arguments:
+ fp : input file
+ pdict: dictionary containing other parameters of content-type header
+
+ Returns a dictionary just like parse_qs(): keys are the field names, each
+ value is a list of values for that field. This is easy to use but not
+ much good if you are expecting megabytes to be uploaded -- in that case,
+ use the FieldStorage class instead which is much more flexible. Note
+ that content-type is the raw, unparsed contents of the content-type
+ header.
+
+ XXX This does not parse nested multipart parts -- use FieldStorage for
+ that.
+
+ XXX This should really be subsumed by FieldStorage altogether -- no
+ point in having two implementations of the same parsing algorithm.
+ Also, FieldStorage protects itself better against certain DoS attacks
+ by limiting the size of the data read in one chunk. The API here
+ does not support that kind of protection. This also affects parse()
+ since it can call parse_multipart().
+
+ """
+ boundary = ""
+ if 'boundary' in pdict:
+ boundary = pdict['boundary']
+ if not valid_boundary(boundary):
+ raise ValueError, ('Invalid boundary in multipart form: %r'
+ % (boundary,))
+
+ nextpart = "--" + boundary
+ lastpart = "--" + boundary + "--"
+ partdict = {}
+ terminator = ""
+
+ while terminator != lastpart:
+ bytes = -1
+ data = None
+ if terminator:
+ # At start of next part. Read headers first.
+ headers = mimetools.Message(fp)
+ clength = headers.getheader('content-length')
+ if clength:
+ try:
+ bytes = int(clength)
+ except ValueError:
+ pass
+ if bytes > 0:
+ if maxlen and bytes > maxlen:
+ raise ValueError, 'Maximum content length exceeded'
+ data = fp.read(bytes)
+ else:
+ data = ""
+ # Read lines until end of part.
+ lines = []
+ while 1:
+ line = fp.readline()
+ if not line:
+ terminator = lastpart # End outer loop
+ break
+ if line[:2] == "--":
+ terminator = line.strip()
+ if terminator in (nextpart, lastpart):
+ break
+ lines.append(line)
+ # Done with part.
+ if data is None:
+ continue
+ if bytes < 0:
+ if lines:
+ # Strip final line terminator
+ line = lines[-1]
+ if line[-2:] == "\r\n":
+ line = line[:-2]
+ elif line[-1:] == "\n":
+ line = line[:-1]
+ lines[-1] = line
+ data = "".join(lines)
+ line = headers['content-disposition']
+ if not line:
+ continue
+ key, params = parse_header(line)
+ if key != 'form-data':
+ continue
+ if 'name' in params:
+ name = params['name']
+ else:
+ continue
+ if name in partdict:
+ partdict[name].append(data)
+ else:
+ partdict[name] = [data]
+
+ return partdict
+
+
+def _parseparam(s):
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ yield f.strip()
+ s = s[end:]
+
+def parse_header(line):
+ """Parse a Content-type like header.
+
+ Return the main content-type and a dictionary of options.
+
+ """
+ parts = _parseparam(';' + line)
+ key = parts.next()
+ pdict = {}
+ for p in parts:
+ i = p.find('=')
+ if i >= 0:
+ name = p[:i].strip().lower()
+ value = p[i+1:].strip()
+ if len(value) >= 2 and value[0] == value[-1] == '"':
+ value = value[1:-1]
+ value = value.replace('\\\\', '\\').replace('\\"', '"')
+ pdict[name] = value
+ return key, pdict
+
+
+# Classes for field storage
+# =========================
+
+class MiniFieldStorage:
+
+ """Like FieldStorage, for use when no file uploads are possible."""
+
+ # Dummy attributes
+ filename = None
+ list = None
+ type = None
+ file = None
+ type_options = {}
+ disposition = None
+ disposition_options = {}
+ headers = {}
+
+ def __init__(self, name, value):
+ """Constructor from field name and value."""
+ self.name = name
+ self.value = value
+ # self.file = StringIO(value)
+
+ def __repr__(self):
+ """Return printable representation."""
+ return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
+
+
+class FieldStorage:
+
+ """Store a sequence of fields, reading multipart/form-data.
+
+ This class provides naming, typing, files stored on disk, and
+ more. At the top level, it is accessible like a dictionary, whose
+ keys are the field names. (Note: None can occur as a field name.)
+ The items are either a Python list (if there's multiple values) or
+ another FieldStorage or MiniFieldStorage object. If it's a single
+ object, it has the following attributes:
+
+ name: the field name, if specified; otherwise None
+
+ filename: the filename, if specified; otherwise None; this is the
+ client side filename, *not* the file name on which it is
+ stored (that's a temporary file you don't deal with)
+
+ value: the value as a *string*; for file uploads, this
+ transparently reads the file every time you request the value
+
+ file: the file(-like) object from which you can read the data;
+ None if the data is stored a simple string
+
+ type: the content-type, or None if not specified
+
+ type_options: dictionary of options specified on the content-type
+ line
+
+ disposition: content-disposition, or None if not specified
+
+ disposition_options: dictionary of corresponding options
+
+ headers: a dictionary(-like) object (sometimes rfc822.Message or a
+ subclass thereof) containing *all* headers
+
+ The class is subclassable, mostly for the purpose of overriding
+ the make_file() method, which is called internally to come up with
+ a file open for reading and writing. This makes it possible to
+ override the default choice of storing all files in a temporary
+ directory and unlinking them as soon as they have been opened.
+
+ """
+
+ def __init__(self, fp=None, headers=None, outerboundary="",
+ environ=os.environ, keep_blank_values=0, strict_parsing=0):
+ """Constructor. Read multipart/* until last part.
+
+ Arguments, all optional:
+
+ fp : file pointer; default: sys.stdin
+ (not used when the request method is GET)
+
+ headers : header dictionary-like object; default:
+ taken from environ as per CGI spec
+
+ outerboundary : terminating multipart boundary
+ (for internal use only)
+
+ environ : environment dictionary; default: os.environ
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded forms should be treated as blank strings.
+ A true value indicates that blanks should be retained as
+ blank strings. The default false value indicates that
+ blank values are to be ignored and treated as if they were
+ not included.
+
+ strict_parsing: flag indicating what to do with parsing errors.
+ If false (the default), errors are silently ignored.
+ If true, errors raise a ValueError exception.
+
+ """
+ method = 'GET'
+ self.keep_blank_values = keep_blank_values
+ self.strict_parsing = strict_parsing
+ if 'REQUEST_METHOD' in environ:
+ method = environ['REQUEST_METHOD'].upper()
+ self.qs_on_post = None
+ if method == 'GET' or method == 'HEAD':
+ if 'QUERY_STRING' in environ:
+ qs = environ['QUERY_STRING']
+ elif sys.argv[1:]:
+ qs = sys.argv[1]
+ else:
+ qs = ""
+ fp = StringIO(qs)
+ if headers is None:
+ headers = {'content-type':
+ "application/x-www-form-urlencoded"}
+ if headers is None:
+ headers = {}
+ if method == 'POST':
+ # Set default content-type for POST to what's traditional
+ headers['content-type'] = "application/x-www-form-urlencoded"
+ if 'CONTENT_TYPE' in environ:
+ headers['content-type'] = environ['CONTENT_TYPE']
+ if 'QUERY_STRING' in environ:
+ self.qs_on_post = environ['QUERY_STRING']
+ if 'CONTENT_LENGTH' in environ:
+ headers['content-length'] = environ['CONTENT_LENGTH']
+ self.fp = fp or sys.stdin
+ self.headers = headers
+ self.outerboundary = outerboundary
+
+ # Process content-disposition header
+ cdisp, pdict = "", {}
+ if 'content-disposition' in self.headers:
+ cdisp, pdict = parse_header(self.headers['content-disposition'])
+ self.disposition = cdisp
+ self.disposition_options = pdict
+ self.name = None
+ if 'name' in pdict:
+ self.name = pdict['name']
+ self.filename = None
+ if 'filename' in pdict:
+ self.filename = pdict['filename']
+
+ # Process content-type header
+ #
+ # Honor any existing content-type header. But if there is no
+ # content-type header, use some sensible defaults. Assume
+ # outerboundary is "" at the outer level, but something non-false
+ # inside a multi-part. The default for an inner part is text/plain,
+ # but for an outer part it should be urlencoded. This should catch
+ # bogus clients which erroneously forget to include a content-type
+ # header.
+ #
+ # See below for what we do if there does exist a content-type header,
+ # but it happens to be something we don't understand.
+ if 'content-type' in self.headers:
+ ctype, pdict = parse_header(self.headers['content-type'])
+ elif self.outerboundary or method != 'POST':
+ ctype, pdict = "text/plain", {}
+ else:
+ ctype, pdict = 'application/x-www-form-urlencoded', {}
+ self.type = ctype
+ self.type_options = pdict
+ self.innerboundary = ""
+ if 'boundary' in pdict:
+ self.innerboundary = pdict['boundary']
+ clen = -1
+ if 'content-length' in self.headers:
+ try:
+ clen = int(self.headers['content-length'])
+ except ValueError:
+ pass
+ if maxlen and clen > maxlen:
+ raise ValueError, 'Maximum content length exceeded'
+ self.length = clen
+
+ self.list = self.file = None
+ self.done = 0
+ if ctype == 'application/x-www-form-urlencoded':
+ self.read_urlencoded()
+ elif ctype[:10] == 'multipart/':
+ self.read_multi(environ, keep_blank_values, strict_parsing)
+ else:
+ self.read_single()
+
+ def __repr__(self):
+ """Return a printable representation."""
+ return "FieldStorage(%r, %r, %r)" % (
+ self.name, self.filename, self.value)
+
+ def __iter__(self):
+ return iter(self.keys())
+
+ def __getattr__(self, name):
+ if name != 'value':
+ raise AttributeError, name
+ if self.file:
+ self.file.seek(0)
+ value = self.file.read()
+ self.file.seek(0)
+ elif self.list is not None:
+ value = self.list
+ else:
+ value = None
+ return value
+
+ def __getitem__(self, key):
+ """Dictionary style indexing."""
+ if self.list is None:
+ raise TypeError, "not indexable"
+ found = []
+ for item in self.list:
+ if item.name == key: found.append(item)
+ if not found:
+ raise KeyError, key
+ if len(found) == 1:
+ return found[0]
+ else:
+ return found
+
+ def getvalue(self, key, default=None):
+ """Dictionary style get() method, including 'value' lookup."""
+ if key in self:
+ value = self[key]
+ if type(value) is type([]):
+ return map(attrgetter('value'), value)
+ else:
+ return value.value
+ else:
+ return default
+
+ def getfirst(self, key, default=None):
+ """ Return the first value received."""
+ if key in self:
+ value = self[key]
+ if type(value) is type([]):
+ return value[0].value
+ else:
+ return value.value
+ else:
+ return default
+
+ def getlist(self, key):
+ """ Return list of received values."""
+ if key in self:
+ value = self[key]
+ if type(value) is type([]):
+ return map(attrgetter('value'), value)
+ else:
+ return [value.value]
+ else:
+ return []
+
+ def keys(self):
+ """Dictionary style keys() method."""
+ if self.list is None:
+ raise TypeError, "not indexable"
+ return list(set(item.name for item in self.list))
+
+ def has_key(self, key):
+ """Dictionary style has_key() method."""
+ if self.list is None:
+ raise TypeError, "not indexable"
+ return any(item.name == key for item in self.list)
+
+ def __contains__(self, key):
+ """Dictionary style __contains__ method."""
+ if self.list is None:
+ raise TypeError, "not indexable"
+ return any(item.name == key for item in self.list)
+
+ def __len__(self):
+ """Dictionary style len(x) support."""
+ return len(self.keys())
+
+ def __nonzero__(self):
+ return bool(self.list)
+
+ def read_urlencoded(self):
+ """Internal: read data in query string format."""
+ qs = self.fp.read(self.length)
+ if self.qs_on_post:
+ qs += '&' + self.qs_on_post
+ self.list = list = []
+ for key, value in urlparse.parse_qsl(qs, self.keep_blank_values,
+ self.strict_parsing):
+ list.append(MiniFieldStorage(key, value))
+ self.skip_lines()
+
+ FieldStorageClass = None
+
+ def read_multi(self, environ, keep_blank_values, strict_parsing):
+ """Internal: read a part that is itself multipart."""
+ ib = self.innerboundary
+ if not valid_boundary(ib):
+ raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
+ self.list = []
+ if self.qs_on_post:
+ for key, value in urlparse.parse_qsl(self.qs_on_post,
+ self.keep_blank_values, self.strict_parsing):
+ self.list.append(MiniFieldStorage(key, value))
+ FieldStorageClass = None
+
+ klass = self.FieldStorageClass or self.__class__
+ part = klass(self.fp, {}, ib,
+ environ, keep_blank_values, strict_parsing)
+ # Throw first part away
+ while not part.done:
+ headers = rfc822.Message(self.fp)
+ part = klass(self.fp, headers, ib,
+ environ, keep_blank_values, strict_parsing)
+ self.list.append(part)
+ self.skip_lines()
+
+ def read_single(self):
+ """Internal: read an atomic part."""
+ if self.length >= 0:
+ self.read_binary()
+ self.skip_lines()
+ else:
+ self.read_lines()
+ self.file.seek(0)
+
+ bufsize = 8*1024 # I/O buffering size for copy to file
+
+ def read_binary(self):
+ """Internal: read binary data."""
+ self.file = self.make_file('b')
+ todo = self.length
+ if todo >= 0:
+ while todo > 0:
+ data = self.fp.read(min(todo, self.bufsize))
+ if not data:
+ self.done = -1
+ break
+ self.file.write(data)
+ todo = todo - len(data)
+
+ def read_lines(self):
+ """Internal: read lines until EOF or outerboundary."""
+ self.file = self.__file = StringIO()
+ if self.outerboundary:
+ self.read_lines_to_outerboundary()
+ else:
+ self.read_lines_to_eof()
+
+ def __write(self, line):
+ if self.__file is not None:
+ if self.__file.tell() + len(line) > 1000:
+ self.file = self.make_file('')
+ self.file.write(self.__file.getvalue())
+ self.__file = None
+ self.file.write(line)
+
+ def read_lines_to_eof(self):
+ """Internal: read lines until EOF."""
+ while 1:
+ line = self.fp.readline(1<<16)
+ if not line:
+ self.done = -1
+ break
+ self.__write(line)
+
+ def read_lines_to_outerboundary(self):
+ """Internal: read lines until outerboundary."""
+ next = "--" + self.outerboundary
+ last = next + "--"
+ delim = ""
+ last_line_lfend = True
+ while 1:
+ line = self.fp.readline(1<<16)
+ if not line:
+ self.done = -1
+ break
+ if delim == "\r":
+ line = delim + line
+ delim = ""
+ if line[:2] == "--" and last_line_lfend:
+ strippedline = line.strip()
+ if strippedline == next:
+ break
+ if strippedline == last:
+ self.done = 1
+ break
+ odelim = delim
+ if line[-2:] == "\r\n":
+ delim = "\r\n"
+ line = line[:-2]
+ last_line_lfend = True
+ elif line[-1] == "\n":
+ delim = "\n"
+ line = line[:-1]
+ last_line_lfend = True
+ elif line[-1] == "\r":
+ # We may interrupt \r\n sequences if they span the 2**16
+ # byte boundary
+ delim = "\r"
+ line = line[:-1]
+ last_line_lfend = False
+ else:
+ delim = ""
+ last_line_lfend = False
+ self.__write(odelim + line)
+
+ def skip_lines(self):
+ """Internal: skip lines until outer boundary if defined."""
+ if not self.outerboundary or self.done:
+ return
+ next = "--" + self.outerboundary
+ last = next + "--"
+ last_line_lfend = True
+ while 1:
+ line = self.fp.readline(1<<16)
+ if not line:
+ self.done = -1
+ break
+ if line[:2] == "--" and last_line_lfend:
+ strippedline = line.strip()
+ if strippedline == next:
+ break
+ if strippedline == last:
+ self.done = 1
+ break
+ last_line_lfend = line.endswith('\n')
+
+ def make_file(self, binary=None):
+ """Overridable: return a readable & writable file.
+
+ The file will be used as follows:
+ - data is written to it
+ - seek(0)
+ - data is read from it
+
+ The 'binary' argument is unused -- the file is always opened
+ in binary mode.
+
+ This version opens a temporary file for reading and writing,
+ and immediately deletes (unlinks) it. The trick (on Unix!) is
+ that the file can still be used, but it can't be opened by
+ another process, and it will automatically be deleted when it
+ is closed or when the current process terminates.
+
+ If you want a more permanent file, you derive a class which
+ overrides this method. If you want a visible temporary file
+ that is nevertheless automatically deleted when the script
+ terminates, try defining a __del__ method in a derived class
+ which unlinks the temporary files you have created.
+
+ """
+ import tempfile
+ return tempfile.TemporaryFile("w+b")
+
+
+
+# Backwards Compatibility Classes
+# ===============================
+
+class FormContentDict(UserDict.UserDict):
+ """Form content as dictionary with a list of values per field.
+
+ form = FormContentDict()
+
+ form[key] -> [value, value, ...]
+ key in form -> Boolean
+ form.keys() -> [key, key, ...]
+ form.values() -> [[val, val, ...], [val, val, ...], ...]
+ form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...]
+ form.dict == {key: [val, val, ...], ...}
+
+ """
+ def __init__(self, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+ self.dict = self.data = parse(environ=environ,
+ keep_blank_values=keep_blank_values,
+ strict_parsing=strict_parsing)
+ self.query_string = environ['QUERY_STRING']
+
+
+class SvFormContentDict(FormContentDict):
+ """Form content as dictionary expecting a single value per field.
+
+ If you only expect a single value for each field, then form[key]
+ will return that single value. It will raise an IndexError if
+ that expectation is not true. If you expect a field to have
+ possible multiple values, than you can use form.getlist(key) to
+ get all of the values. values() and items() are a compromise:
+ they return single strings where there is a single value, and
+ lists of strings otherwise.
+
+ """
+ def __getitem__(self, key):
+ if len(self.dict[key]) > 1:
+ raise IndexError, 'expecting a single value'
+ return self.dict[key][0]
+ def getlist(self, key):
+ return self.dict[key]
+ def values(self):
+ result = []
+ for value in self.dict.values():
+ if len(value) == 1:
+ result.append(value[0])
+ else: result.append(value)
+ return result
+ def items(self):
+ result = []
+ for key, value in self.dict.items():
+ if len(value) == 1:
+ result.append((key, value[0]))
+ else: result.append((key, value))
+ return result
+
+
+class InterpFormContentDict(SvFormContentDict):
+ """This class is present for backwards compatibility only."""
+ def __getitem__(self, key):
+ v = SvFormContentDict.__getitem__(self, key)
+ if v[0] in '0123456789+-.':
+ try: return int(v)
+ except ValueError:
+ try: return float(v)
+ except ValueError: pass
+ return v.strip()
+ def values(self):
+ result = []
+ for key in self.keys():
+ try:
+ result.append(self[key])
+ except IndexError:
+ result.append(self.dict[key])
+ return result
+ def items(self):
+ result = []
+ for key in self.keys():
+ try:
+ result.append((key, self[key]))
+ except IndexError:
+ result.append((key, self.dict[key]))
+ return result
+
+
+class FormContent(FormContentDict):
+ """This class is present for backwards compatibility only."""
+ def values(self, key):
+ if key in self.dict :return self.dict[key]
+ else: return None
+ def indexed_value(self, key, location):
+ if key in self.dict:
+ if len(self.dict[key]) > location:
+ return self.dict[key][location]
+ else: return None
+ else: return None
+ def value(self, key):
+ if key in self.dict: return self.dict[key][0]
+ else: return None
+ def length(self, key):
+ return len(self.dict[key])
+ def stripped(self, key):
+ if key in self.dict: return self.dict[key][0].strip()
+ else: return None
+ def pars(self):
+ return self.dict
+
+
+# Test/debug code
+# ===============
+
+def test(environ=os.environ):
+ """Robust test CGI script, usable as main program.
+
+ Write minimal HTTP headers and dump all information provided to
+ the script in HTML form.
+
+ """
+ print "Content-type: text/html"
+ print
+ sys.stderr = sys.stdout
+ try:
+ form = FieldStorage() # Replace with other classes to test those
+ print_directory()
+ print_arguments()
+ print_form(form)
+ print_environ(environ)
+ print_environ_usage()
+ def f():
+ exec "testing print_exception() -- italics? "
+ def g(f=f):
+ f()
+ print "What follows is a test, not an actual exception: "
+ g()
+ except:
+ print_exception()
+
+ print "Second try with a small maxlen... "
+
+ global maxlen
+ maxlen = 50
+ try:
+ form = FieldStorage() # Replace with other classes to test those
+ print_directory()
+ print_arguments()
+ print_form(form)
+ print_environ(environ)
+ except:
+ print_exception()
+
+def print_exception(type=None, value=None, tb=None, limit=None):
+ if type is None:
+ type, value, tb = sys.exc_info()
+ import traceback
+ print
+ print "Traceback (most recent call last): "
+ list = traceback.format_tb(tb, limit) + \
+ traceback.format_exception_only(type, value)
+ print "%s%s " % (
+ escape("".join(list[:-1])),
+ escape(list[-1]),
+ )
+ del tb
+
+def print_environ(environ=os.environ):
+ """Dump the shell environment as HTML."""
+ keys = environ.keys()
+ keys.sort()
+ print
+ print "Shell Environment: "
+ print ""
+ for key in keys:
+ print "", escape(key), " ", escape(environ[key])
+ print " "
+ print
+
+def print_form(form):
+ """Dump the contents of a form as HTML."""
+ keys = form.keys()
+ keys.sort()
+ print
+ print "Form Contents: "
+ if not keys:
+ print "No form fields."
+ print "
"
+ for key in keys:
+ print "" + escape(key) + ":",
+ value = form[key]
+ print "" + escape(repr(type(value))) + " "
+ print " " + escape(repr(value))
+ print " "
+ print
+
+def print_directory():
+ """Dump the current directory as HTML."""
+ print
+ print "Current Working Directory: "
+ try:
+ pwd = os.getcwd()
+ except os.error, msg:
+ print "os.error:", escape(str(msg))
+ else:
+ print escape(pwd)
+ print
+
+def print_arguments():
+ print
+ print "Command Line Arguments: "
+ print
+ print sys.argv
+ print
+
+def print_environ_usage():
+ """Dump a list of environment variables used by CGI as HTML."""
+ print """
+These environment variables could have been set:
+
+AUTH_TYPE
+ CONTENT_LENGTH
+ CONTENT_TYPE
+ DATE_GMT
+ DATE_LOCAL
+ DOCUMENT_NAME
+ DOCUMENT_ROOT
+ DOCUMENT_URI
+ GATEWAY_INTERFACE
+ LAST_MODIFIED
+ PATH
+ PATH_INFO
+ PATH_TRANSLATED
+ QUERY_STRING
+ REMOTE_ADDR
+ REMOTE_HOST
+ REMOTE_IDENT
+ REMOTE_USER
+ REQUEST_METHOD
+ SCRIPT_NAME
+ SERVER_NAME
+ SERVER_PORT
+ SERVER_PROTOCOL
+ SERVER_ROOT
+ SERVER_SOFTWARE
+
+In addition, HTTP headers sent by the server may be passed in the
+environment as well. Here are some common variable names:
+
+HTTP_ACCEPT
+ HTTP_CONNECTION
+ HTTP_HOST
+ HTTP_PRAGMA
+ HTTP_REFERER
+ HTTP_USER_AGENT
+
+"""
+
+
+# Utilities
+# =========
+
+def escape(s, quote=None):
+ '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
+ If the optional flag quote is true, the quotation mark character (")
+ is also translated.'''
+ s = s.replace("&", "&") # Must be done first!
+ s = s.replace("<", "<")
+ s = s.replace(">", ">")
+ if quote:
+ s = s.replace('"', """)
+ return s
+
+def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
+ import re
+ return re.match(_vb_pattern, s)
+
+# Invoke mainline
+# ===============
+
+# Call test() when this file is run as a script (not imported as a module)
+if __name__ == '__main__':
+ test()
diff --git a/cashew/Lib/cgitb.py b/cashew/Lib/cgitb.py
new file mode 100644
index 0000000..8acc4b7
--- /dev/null
+++ b/cashew/Lib/cgitb.py
@@ -0,0 +1,323 @@
+"""More comprehensive traceback formatting for Python scripts.
+
+To enable this module, do:
+
+ import cgitb; cgitb.enable()
+
+at the top of your script. The optional arguments to enable() are:
+
+ display - if true, tracebacks are displayed in the web browser
+ logdir - if set, tracebacks are written to files in this directory
+ context - number of lines of source code to show for each stack frame
+ format - 'text' or 'html' controls the output format
+
+By default, tracebacks are displayed but not saved, the context is 5 lines
+and the output format is 'html' (for backwards compatibility with the
+original use of this module)
+
+Alternatively, if you have caught an exception and want cgitb to display it
+for you, call cgitb.handler(). The optional argument to handler() is a
+3-item tuple (etype, evalue, etb) just like the value of sys.exc_info().
+The default handler displays output as HTML.
+
+"""
+import inspect
+import keyword
+import linecache
+import os
+import pydoc
+import sys
+import tempfile
+import time
+import tokenize
+import traceback
+import types
+
+def reset():
+ """Return a string that resets the CGI and browser to a known state."""
+ return '''
+ --> -->
+
+ '''
+
+__UNDEF__ = [] # a special sentinel object
+def small(text):
+ if text:
+ return '' + text + ' '
+ else:
+ return ''
+
+def strong(text):
+ if text:
+ return '' + text + ' '
+ else:
+ return ''
+
+def grey(text):
+ if text:
+ return '' + text + ' '
+ else:
+ return ''
+
+def lookup(name, frame, locals):
+ """Find the value for a given name in the given environment."""
+ if name in locals:
+ return 'local', locals[name]
+ if name in frame.f_globals:
+ return 'global', frame.f_globals[name]
+ if '__builtins__' in frame.f_globals:
+ builtins = frame.f_globals['__builtins__']
+ if type(builtins) is type({}):
+ if name in builtins:
+ return 'builtin', builtins[name]
+ else:
+ if hasattr(builtins, name):
+ return 'builtin', getattr(builtins, name)
+ return None, __UNDEF__
+
+def scanvars(reader, frame, locals):
+ """Scan one logical line of Python and look up values of variables used."""
+ vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
+ for ttype, token, start, end, line in tokenize.generate_tokens(reader):
+ if ttype == tokenize.NEWLINE: break
+ if ttype == tokenize.NAME and token not in keyword.kwlist:
+ if lasttoken == '.':
+ if parent is not __UNDEF__:
+ value = getattr(parent, token, __UNDEF__)
+ vars.append((prefix + token, prefix, value))
+ else:
+ where, value = lookup(token, frame, locals)
+ vars.append((token, where, value))
+ elif token == '.':
+ prefix += lasttoken + '.'
+ parent = value
+ else:
+ parent, prefix = None, ''
+ lasttoken = token
+ return vars
+
+def html(einfo, context=5):
+ """Return a nice HTML document describing a given traceback."""
+ etype, evalue, etb = einfo
+ if type(etype) is types.ClassType:
+ etype = etype.__name__
+ pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
+ date = time.ctime(time.time())
+ head = '' + pydoc.html.heading(
+ '%s ' %
+ strong(pydoc.html.escape(str(etype))),
+ '#ffffff', '#6622aa', pyver + ' ' + date) + '''
+A problem occurred in a Python script. Here is the sequence of
+function calls leading up to the error, in the order they occurred.
'''
+
+ indent = '' + small(' ' * 5) + ' '
+ frames = []
+ records = inspect.getinnerframes(etb, context)
+ for frame, file, lnum, func, lines, index in records:
+ if file:
+ file = os.path.abspath(file)
+ link = '%s ' % (file, pydoc.html.escape(file))
+ else:
+ file = link = '?'
+ args, varargs, varkw, locals = inspect.getargvalues(frame)
+ call = ''
+ if func != '?':
+ call = 'in ' + strong(func) + \
+ inspect.formatargvalues(args, varargs, varkw, locals,
+ formatvalue=lambda value: '=' + pydoc.html.repr(value))
+
+ highlight = {}
+ def reader(lnum=[lnum]):
+ highlight[lnum[0]] = 1
+ try: return linecache.getline(file, lnum[0])
+ finally: lnum[0] += 1
+ vars = scanvars(reader, frame, locals)
+
+ rows = ['%s%s %s ' %
+ (' ', link, call)]
+ if index is not None:
+ i = lnum - index
+ for line in lines:
+ num = small(' ' * (5-len(str(i))) + str(i)) + ' '
+ if i in highlight:
+ line = '=>%s%s ' % (num, pydoc.html.preformat(line))
+ rows.append('%s ' % line)
+ else:
+ line = ' %s%s ' % (num, pydoc.html.preformat(line))
+ rows.append('%s ' % grey(line))
+ i += 1
+
+ done, dump = {}, []
+ for name, where, value in vars:
+ if name in done: continue
+ done[name] = 1
+ if value is not __UNDEF__:
+ if where in ('global', 'builtin'):
+ name = ('%s ' % where) + strong(name)
+ elif where == 'local':
+ name = strong(name)
+ else:
+ name = where + strong(name.split('.')[-1])
+ dump.append('%s = %s' % (name, pydoc.html.repr(value)))
+ else:
+ dump.append(name + ' undefined ')
+
+ rows.append('%s ' % small(grey(', '.join(dump))))
+ frames.append('''
+''' % '\n'.join(rows))
+
+ exception = ['%s: %s' % (strong(pydoc.html.escape(str(etype))),
+ pydoc.html.escape(str(evalue)))]
+ if isinstance(evalue, BaseException):
+ for name in dir(evalue):
+ if name[:1] == '_': continue
+ value = pydoc.html.repr(getattr(evalue, name))
+ exception.append('\n %s%s =\n%s' % (indent, name, value))
+
+ return head + ''.join(frames) + ''.join(exception) + '''
+
+
+
+''' % pydoc.html.escape(
+ ''.join(traceback.format_exception(etype, evalue, etb)))
+
+def text(einfo, context=5):
+ """Return a plain text document describing a given traceback."""
+ etype, evalue, etb = einfo
+ if type(etype) is types.ClassType:
+ etype = etype.__name__
+ pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
+ date = time.ctime(time.time())
+ head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + '''
+A problem occurred in a Python script. Here is the sequence of
+function calls leading up to the error, in the order they occurred.
+'''
+
+ frames = []
+ records = inspect.getinnerframes(etb, context)
+ for frame, file, lnum, func, lines, index in records:
+ file = file and os.path.abspath(file) or '?'
+ args, varargs, varkw, locals = inspect.getargvalues(frame)
+ call = ''
+ if func != '?':
+ call = 'in ' + func + \
+ inspect.formatargvalues(args, varargs, varkw, locals,
+ formatvalue=lambda value: '=' + pydoc.text.repr(value))
+
+ highlight = {}
+ def reader(lnum=[lnum]):
+ highlight[lnum[0]] = 1
+ try: return linecache.getline(file, lnum[0])
+ finally: lnum[0] += 1
+ vars = scanvars(reader, frame, locals)
+
+ rows = [' %s %s' % (file, call)]
+ if index is not None:
+ i = lnum - index
+ for line in lines:
+ num = '%5d ' % i
+ rows.append(num+line.rstrip())
+ i += 1
+
+ done, dump = {}, []
+ for name, where, value in vars:
+ if name in done: continue
+ done[name] = 1
+ if value is not __UNDEF__:
+ if where == 'global': name = 'global ' + name
+ elif where != 'local': name = where + name.split('.')[-1]
+ dump.append('%s = %s' % (name, pydoc.text.repr(value)))
+ else:
+ dump.append(name + ' undefined')
+
+ rows.append('\n'.join(dump))
+ frames.append('\n%s\n' % '\n'.join(rows))
+
+ exception = ['%s: %s' % (str(etype), str(evalue))]
+ if isinstance(evalue, BaseException):
+ for name in dir(evalue):
+ value = pydoc.text.repr(getattr(evalue, name))
+ exception.append('\n%s%s = %s' % (" "*4, name, value))
+
+ return head + ''.join(frames) + ''.join(exception) + '''
+
+The above is a description of an error in a Python program. Here is
+the original traceback:
+
+%s
+''' % ''.join(traceback.format_exception(etype, evalue, etb))
+
+class Hook:
+ """A hook to replace sys.excepthook that shows tracebacks in HTML."""
+
+ def __init__(self, display=1, logdir=None, context=5, file=None,
+ format="html"):
+ self.display = display # send tracebacks to browser if true
+ self.logdir = logdir # log tracebacks to files if not None
+ self.context = context # number of source code lines per frame
+ self.file = file or sys.stdout # place to send the output
+ self.format = format
+
+ def __call__(self, etype, evalue, etb):
+ self.handle((etype, evalue, etb))
+
+ def handle(self, info=None):
+ info = info or sys.exc_info()
+ if self.format == "html":
+ self.file.write(reset())
+
+ formatter = (self.format=="html") and html or text
+ plain = False
+ try:
+ doc = formatter(info, self.context)
+ except: # just in case something goes wrong
+ doc = ''.join(traceback.format_exception(*info))
+ plain = True
+
+ if self.display:
+ if plain:
+ doc = doc.replace('&', '&').replace('<', '<')
+ self.file.write('
' + doc + ' \n')
+ else:
+ self.file.write(doc + '\n')
+ else:
+ self.file.write('A problem occurred in a Python script.\n')
+
+ if self.logdir is not None:
+ suffix = ['.txt', '.html'][self.format=="html"]
+ (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir)
+
+ try:
+ file = os.fdopen(fd, 'w')
+ file.write(doc)
+ file.close()
+ msg = '%s contains the description of this error.' % path
+ except:
+ msg = 'Tried to save traceback to %s, but failed.' % path
+
+ if self.format == 'html':
+ self.file.write('
%s
\n' % msg)
+ else:
+ self.file.write(msg + '\n')
+ try:
+ self.file.flush()
+ except: pass
+
+handler = Hook().handle
+def enable(display=1, logdir=None, context=5, format="html"):
+ """Install an exception handler that formats tracebacks as HTML.
+
+ The optional argument 'display' can be set to 0 to suppress sending the
+ traceback to the browser, and 'logdir' can be set to a directory to cause
+ tracebacks to be written to files there."""
+ sys.excepthook = Hook(display=display, logdir=logdir,
+ context=context, format=format)
diff --git a/cashew/Lib/chunk.py b/cashew/Lib/chunk.py
new file mode 100644
index 0000000..2d15abe
--- /dev/null
+++ b/cashew/Lib/chunk.py
@@ -0,0 +1,169 @@
+"""Simple class to read IFF chunks.
+
+An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File
+Format)) has the following structure:
+
++----------------+
+| ID (4 bytes) |
++----------------+
+| size (4 bytes) |
++----------------+
+| data |
+| ... |
++----------------+
+
+The ID is a 4-byte string which identifies the type of chunk.
+
+The size field (a 32-bit value, encoded using big-endian byte order)
+gives the size of the whole chunk, including the 8-byte header.
+
+Usually an IFF-type file consists of one or more chunks. The proposed
+usage of the Chunk class defined here is to instantiate an instance at
+the start of each chunk and read from the instance until it reaches
+the end, after which a new instance can be instantiated. At the end
+of the file, creating a new instance will fail with an EOFError
+exception.
+
+Usage:
+while True:
+ try:
+ chunk = Chunk(file)
+ except EOFError:
+ break
+ chunktype = chunk.getname()
+ while True:
+ data = chunk.read(nbytes)
+ if not data:
+ pass
+ # do something with data
+
+The interface is file-like. The implemented methods are:
+read, close, seek, tell, isatty.
+Extra methods are: skip() (called by close, skips to the end of the chunk),
+getname() (returns the name (ID) of the chunk)
+
+The __init__ method has one required argument, a file-like object
+(including a chunk instance), and one optional argument, a flag which
+specifies whether or not chunks are aligned on 2-byte boundaries. The
+default is 1, i.e. aligned.
+"""
+
+class Chunk:
+ def __init__(self, file, align=True, bigendian=True, inclheader=False):
+ import struct
+ self.closed = False
+ self.align = align # whether to align to word (2-byte) boundaries
+ if bigendian:
+ strflag = '>'
+ else:
+ strflag = '<'
+ self.file = file
+ self.chunkname = file.read(4)
+ if len(self.chunkname) < 4:
+ raise EOFError
+ try:
+ self.chunksize = struct.unpack(strflag+'L', file.read(4))[0]
+ except struct.error:
+ raise EOFError
+ if inclheader:
+ self.chunksize = self.chunksize - 8 # subtract header
+ self.size_read = 0
+ try:
+ self.offset = self.file.tell()
+ except (AttributeError, IOError):
+ self.seekable = False
+ else:
+ self.seekable = True
+
+ def getname(self):
+ """Return the name (ID) of the current chunk."""
+ return self.chunkname
+
+ def getsize(self):
+ """Return the size of the current chunk."""
+ return self.chunksize
+
+ def close(self):
+ if not self.closed:
+ try:
+ self.skip()
+ finally:
+ self.closed = True
+
+ def isatty(self):
+ if self.closed:
+ raise ValueError, "I/O operation on closed file"
+ return False
+
+ def seek(self, pos, whence=0):
+ """Seek to specified position into the chunk.
+ Default position is 0 (start of chunk).
+ If the file is not seekable, this will result in an error.
+ """
+
+ if self.closed:
+ raise ValueError, "I/O operation on closed file"
+ if not self.seekable:
+ raise IOError, "cannot seek"
+ if whence == 1:
+ pos = pos + self.size_read
+ elif whence == 2:
+ pos = pos + self.chunksize
+ if pos < 0 or pos > self.chunksize:
+ raise RuntimeError
+ self.file.seek(self.offset + pos, 0)
+ self.size_read = pos
+
+ def tell(self):
+ if self.closed:
+ raise ValueError, "I/O operation on closed file"
+ return self.size_read
+
+ def read(self, size=-1):
+ """Read at most size bytes from the chunk.
+ If size is omitted or negative, read until the end
+ of the chunk.
+ """
+
+ if self.closed:
+ raise ValueError, "I/O operation on closed file"
+ if self.size_read >= self.chunksize:
+ return ''
+ if size < 0:
+ size = self.chunksize - self.size_read
+ if size > self.chunksize - self.size_read:
+ size = self.chunksize - self.size_read
+ data = self.file.read(size)
+ self.size_read = self.size_read + len(data)
+ if self.size_read == self.chunksize and \
+ self.align and \
+ (self.chunksize & 1):
+ dummy = self.file.read(1)
+ self.size_read = self.size_read + len(dummy)
+ return data
+
+ def skip(self):
+ """Skip the rest of the chunk.
+ If you are not interested in the contents of the chunk,
+ this method should be called so that the file points to
+ the start of the next chunk.
+ """
+
+ if self.closed:
+ raise ValueError, "I/O operation on closed file"
+ if self.seekable:
+ try:
+ n = self.chunksize - self.size_read
+ # maybe fix alignment
+ if self.align and (self.chunksize & 1):
+ n = n + 1
+ self.file.seek(n, 1)
+ self.size_read = self.size_read + n
+ return
+ except IOError:
+ pass
+ while self.size_read < self.chunksize:
+ n = min(8192, self.chunksize - self.size_read)
+ dummy = self.read(n)
+ if not dummy:
+ raise EOFError
diff --git a/cashew/Lib/clrtype.py b/cashew/Lib/clrtype.py
new file mode 100644
index 0000000..d2c13be
--- /dev/null
+++ b/cashew/Lib/clrtype.py
@@ -0,0 +1,630 @@
+# Licensed to the .NET Foundation under one or more agreements.
+# The .NET Foundation licenses this file to you under the Apache 2.0 License.
+# See the LICENSE file in the project root for more information.
+
+
+__all__ = ["ClrClass", "ClrInterface", "accepts", "returns", "attribute", "propagate_attributes"]
+
+import clr
+clr.AddReference("Microsoft.Dynamic")
+clr.AddReference("Microsoft.Scripting")
+clr.AddReference("IronPython")
+
+if clr.IsNetCoreApp:
+ clr.AddReference("System.Reflection.Emit")
+
+import System
+from System import Char, Void, Boolean, Array, Type, AppDomain
+from System.Reflection import FieldAttributes, MethodAttributes, PropertyAttributes, ParameterAttributes
+from System.Reflection import CallingConventions, TypeAttributes, AssemblyName
+from System.Reflection.Emit import OpCodes, CustomAttributeBuilder, AssemblyBuilder, AssemblyBuilderAccess
+from System.Runtime.InteropServices import DllImportAttribute, CallingConvention, CharSet
+from Microsoft.Scripting.Generation import Snippets
+from Microsoft.Scripting.Runtime import DynamicOperations
+from Microsoft.Scripting.Utils import ReflectionUtils
+from IronPython.Runtime import NameType, PythonContext
+from IronPython.Runtime.Types import PythonType, ReflectedField, ReflectedProperty
+
+def validate_clr_types(signature_types, var_signature = False):
+ if not isinstance(signature_types, tuple):
+ signature_types = (signature_types,)
+ for t in signature_types:
+ if type(t) is type(System.IComparable): # type overloaded on generic arity, eg IComparable and IComparable[T]
+ t = t[()] # select non-generic version
+ clr_type = clr.GetClrType(t)
+ if t == Void:
+ raise TypeError("Void cannot be used in signature")
+ is_typed = clr.GetPythonType(clr_type) == t
+ # is_typed needs to be weakened until the generated type
+ # gets explicitly published as the underlying CLR type
+ is_typed = is_typed or (hasattr(t, "__metaclass__") and t.__metaclass__ in [ClrInterface, ClrClass])
+ if not is_typed:
+ raise Exception, "Invalid CLR type %s" % str(t)
+ if not var_signature:
+ if clr_type.IsByRef:
+ raise TypeError("Byref can only be used as arguments and locals")
+ # ArgIterator is not present in Silverlight
+ if hasattr(System, "ArgIterator") and t == System.ArgIterator:
+ raise TypeError("Stack-referencing types can only be used as arguments and locals")
+
+class TypedFunction(object):
+ """
+ A strongly-typed function can get wrapped up as a staticmethod, a property, etc.
+ This class represents the raw function, but with the type information
+ it is decorated with.
+ Other information is stored as attributes on the function. See propagate_attributes
+ """
+ def __init__(self, function, is_static = False, prop_name_if_prop_get = None, prop_name_if_prop_set = None):
+ self.function = function
+ self.is_static = is_static
+ self.prop_name_if_prop_get = prop_name_if_prop_get
+ self.prop_name_if_prop_set = prop_name_if_prop_set
+
+class ClrType(type):
+ """
+ Base metaclass for creating strongly-typed CLR types
+ """
+
+ def is_typed_method(self, function):
+ if hasattr(function, "arg_types") != hasattr(function, "return_type"):
+ raise TypeError("One of @accepts and @returns is missing for %s" % function.func_name)
+
+ return hasattr(function, "arg_types")
+
+ def get_typed_properties(self):
+ for item_name, item in self.__dict__.items():
+ if isinstance(item, property):
+ if item.fget:
+ if not self.is_typed_method(item.fget): continue
+ prop_type = item.fget.return_type
+ else:
+ if not self.is_typed_method(item.fset): continue
+ prop_type = item.fset.arg_types[0]
+ validate_clr_types(prop_type)
+ clr_prop_type = clr.GetClrType(prop_type)
+ yield item, item_name, clr_prop_type
+
+ def emit_properties(self, typebld):
+ for prop, prop_name, clr_prop_type in self.get_typed_properties():
+ self.emit_property(typebld, prop, prop_name, clr_prop_type)
+
+ def emit_property(self, typebld, prop, name, clrtype):
+ prpbld = typebld.DefineProperty(name, PropertyAttributes.None, clrtype, None)
+ if prop.fget:
+ getter = self.emitted_methods[(prop.fget.func_name, prop.fget.arg_types)]
+ prpbld.SetGetMethod(getter)
+ if prop.fset:
+ setter = self.emitted_methods[(prop.fset.func_name, prop.fset.arg_types)]
+ prpbld.SetSetMethod(setter)
+
+ def dummy_function(self): raise RuntimeError("this should not get called")
+
+ def get_typed_methods(self):
+ """
+ Get all the methods with @accepts (and @returns) decorators
+ Functions are assumed to be instance methods, unless decorated with @staticmethod
+ """
+
+ # We avoid using the "types" library as it is not a builtin
+ FunctionType = type(ClrType.__dict__["dummy_function"])
+
+ for item_name, item in self.__dict__.items():
+ function = None
+ is_static = False
+ if isinstance(item, FunctionType):
+ function, is_static = item, False
+ elif isinstance(item, staticmethod):
+ function, is_static = getattr(self, item_name), True
+ elif isinstance(item, property):
+ if item.fget and self.is_typed_method(item.fget):
+ if item.fget.func_name == item_name:
+ # The property hides the getter. So yield the getter
+ yield TypedFunction(item.fget, False, item_name, None)
+ if item.fset and self.is_typed_method(item.fset):
+ if item.fset.func_name == item_name:
+ # The property hides the setter. So yield the setter
+ yield TypedFunction(item.fset, False, None, item_name)
+ continue
+ else:
+ continue
+ if self.is_typed_method(function):
+ yield TypedFunction(function, is_static)
+
+ def emit_methods(self, typebld):
+ # We need to track the generated methods so that we can emit properties
+ # referring these methods.
+ # Also, the hash is indexed by name *and signature*. Even though Python does
+ # not have method overloading, property getter and setter functions can have
+ # the same func_name attribute
+ self.emitted_methods = {}
+ for function_info in self.get_typed_methods():
+ method_builder = self.emit_method(typebld, function_info)
+ function = function_info.function
+ if self.emitted_methods.has_key((function.func_name, function.arg_types)):
+ raise TypeError("methods with clashing names")
+ self.emitted_methods[(function.func_name, function.arg_types)] = method_builder
+
+ def emit_classattribs(self, typebld):
+ if hasattr(self, '_clrclassattribs'):
+ for attrib_info in self._clrclassattribs:
+ if isinstance(attrib_info, type):
+ ci = clr.GetClrType(attrib_info).GetConstructor(())
+ cab = CustomAttributeBuilder(ci, ())
+ elif isinstance(attrib_info, CustomAttributeDecorator):
+ cab = attrib_info.GetBuilder()
+ else:
+ make_decorator = attrib_info()
+ cab = make_decorator.GetBuilder()
+ typebld.SetCustomAttribute(cab)
+
+ def get_clr_type_name(self):
+ if hasattr(self, "_clrnamespace"):
+ return self._clrnamespace + "." + self.__name__
+ else:
+ return self.__name__
+
+ def create_type(self, typebld):
+ self.emit_members(typebld)
+ new_type = typebld.CreateType()
+ self.map_members(new_type)
+ return new_type
+
+class ClrInterface(ClrType):
+ """
+ Set __metaclass__ in a Python class declaration to declare a
+ CLR interface type.
+ You need to specify object as the base-type if you do not specify any other
+ interfaces as the base interfaces
+ """
+
+ def __init__(self, *args):
+ return super(ClrInterface, self).__init__(*args)
+
+ def emit_method(self, typebld, function_info):
+ assert(not function_info.is_static)
+ function = function_info.function
+ attributes = MethodAttributes.Public | MethodAttributes.Virtual | MethodAttributes.Abstract
+ method_builder = typebld.DefineMethod(
+ function.func_name,
+ attributes,
+ function.return_type,
+ function.arg_types)
+
+ instance_offset = 0 if function_info.is_static else 1
+ arg_names = function.func_code.co_varnames
+ for i in xrange(len(function.arg_types)):
+ # TODO - set non-trivial ParameterAttributes, default value and custom attributes
+ p = method_builder.DefineParameter(i + 1, ParameterAttributes.None, arg_names[i + instance_offset])
+
+ if hasattr(function, "CustomAttributeBuilders"):
+ for cab in function.CustomAttributeBuilders:
+ method_builder.SetCustomAttribute(cab)
+
+ return method_builder
+
+ def emit_members(self, typebld):
+ self.emit_methods(typebld)
+ self.emit_properties(typebld)
+ self.emit_classattribs(typebld)
+
+ def map_members(self, new_type): pass
+
+ interface_module_builder = None
+
+ @staticmethod
+ def define_interface(typename, bases):
+ for b in bases:
+ validate_clr_types(b)
+ if not ClrInterface.interface_module_builder:
+ name = AssemblyName("interfaces")
+ access = AssemblyBuilderAccess.Run
+ assembly_builder = ReflectionUtils.DefineDynamicAssembly(name, access)
+ ClrInterface.interface_module_builder = assembly_builder.DefineDynamicModule("interfaces")
+ attrs = TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract
+ return ClrInterface.interface_module_builder.DefineType(typename, attrs, None, bases)
+
+ def map_clr_type(self, clr_type):
+ """
+ TODO - Currently "t = clr.GetPythonType(clr.GetClrType(C)); t == C" will be False
+ for C where C.__metaclass__ is ClrInterface, even though both t and C
+ represent the same CLR type. This can be fixed by publishing a mapping
+ between t and C in the IronPython runtime.
+ """
+ pass
+
+ def __clrtype__(self):
+ # CFoo below will use ClrInterface as its metaclass, but the user will not expect CFoo
+ # to be an interface in this case:
+ #
+ # class IFoo(object):
+ # __metaclass__ = ClrInterface
+ # class CFoo(IFoo): pass
+ if not "__metaclass__" in self.__dict__:
+ return super(ClrInterface, self).__clrtype__()
+
+ bases = list(self.__bases__)
+ bases.remove(object)
+ bases = tuple(bases)
+ if False: # Snippets currently does not support creating interfaces
+ typegen = Snippets.Shared.DefineType(self.get_clr_type_name(), bases, True, False)
+ typebld = typegen.TypeBuilder
+ else:
+ typebld = ClrInterface.define_interface(self.get_clr_type_name(), bases)
+ clr_type = self.create_type(typebld)
+ self.map_clr_type(clr_type)
+ return clr_type
+
+# Note that ClrClass inherits from ClrInterface to satisfy Python requirements of metaclasses.
+# A metaclass of a subtype has to be subtype of the metaclass of a base type. As a result,
+# if you define a type hierarchy as shown below, it requires ClrClass to be a subtype
+# of ClrInterface:
+#
+# class IFoo(object):
+# __metaclass__ = ClrInterface
+# class CFoo(IFoo):
+# __metaclass__ = ClrClass
+class ClrClass(ClrInterface):
+ """
+ Set __metaclass__ in a Python class declaration to specify strong-type
+ information for the class or its attributes. The Python class
+ retains its Python attributes, like being able to add or remove methods.
+ """
+
+ # Holds the FieldInfo for a static CLR field which points to a
+ # Microsoft.Scripting.Runtime.DynamicOperations corresponding to the current ScriptEngine
+ dynamic_operations_field = None
+
+ def emit_fields(self, typebld):
+ if hasattr(self, "_clrfields"):
+ for fldname in self._clrfields:
+ field_type = self._clrfields[fldname]
+ validate_clr_types(field_type)
+ typebld.DefineField(
+ fldname,
+ clr.GetClrType(field_type),
+ FieldAttributes.Public)
+
+ def map_fields(self, new_type):
+ if hasattr(self, "_clrfields"):
+ for fldname in self._clrfields:
+ fldinfo = new_type.GetField(fldname)
+ setattr(self, fldname, ReflectedField(fldinfo))
+
+ @staticmethod
+ def get_dynamic_operations_field():
+ if ClrClass.dynamic_operations_field:
+ return ClrClass.dynamic_operations_field
+ python_context = clr.GetCurrentRuntime().GetLanguage(PythonContext)
+ dynamic_operations = DynamicOperations(python_context)
+
+ typegen = Snippets.Shared.DefineType(
+ "DynamicOperationsHolder" + str(hash(python_context)),
+ object,
+ True,
+ False)
+ typebld = typegen.TypeBuilder
+ typebld.DefineField(
+ "DynamicOperations",
+ DynamicOperations,
+ FieldAttributes.Public | FieldAttributes.Static)
+ new_type = typebld.CreateType()
+ ClrClass.dynamic_operations_field = new_type.GetField("DynamicOperations")
+
+ ClrClass.dynamic_operations_field.SetValue(None, dynamic_operations)
+
+ return ClrClass.dynamic_operations_field
+
+ def emit_typed_stub_to_python_method(self, typebld, function_info):
+ function = function_info.function
+ """
+ Generate a stub method that repushes all the arguments and
+ dispatches to DynamicOperations.InvokeMember
+ """
+ invoke_member = clr.GetClrType(DynamicOperations).GetMethod(
+ "InvokeMember",
+ Array[Type]((object, str, Array[object])))
+
+ # Type.GetMethod raises an AmbiguousMatchException if there is a generic and a non-generic method
+ # (like DynamicOperations.GetMember) with the same name and signature. So we have to do things
+ # the hard way
+ get_member_search = [m for m in clr.GetClrType(DynamicOperations).GetMethods() if m.Name == "GetMember" and not m.IsGenericMethod and m.GetParameters().Length == 2]
+ assert(len(get_member_search) == 1)
+ get_member = get_member_search[0]
+
+ set_member_search = [m for m in clr.GetClrType(DynamicOperations).GetMethods() if m.Name == "SetMember" and not m.IsGenericMethod and m.GetParameters().Length == 3]
+ assert(len(set_member_search) == 1)
+ set_member = set_member_search[0]
+
+ convert_to = clr.GetClrType(DynamicOperations).GetMethod(
+ "ConvertTo",
+ Array[Type]((object, Type)))
+ get_type_from_handle = clr.GetClrType(Type).GetMethod("GetTypeFromHandle")
+
+ attributes = MethodAttributes.Public
+ if function_info.is_static: attributes |= MethodAttributes.Static
+ if function.func_name == "__new__":
+ if function_info.is_static: raise TypeError
+ method_builder = typebld.DefineConstructor(
+ attributes,
+ CallingConventions.HasThis,
+ function.arg_types)
+ raise NotImplementedError("Need to call self.baseType ctor passing in self.get_python_type_field()")
+ else:
+ method_builder = typebld.DefineMethod(
+ function.func_name,
+ attributes,
+ function.return_type,
+ function.arg_types)
+
+ instance_offset = 0 if function_info.is_static else 1
+ arg_names = function.func_code.co_varnames
+ for i in xrange(len(function.arg_types)):
+ # TODO - set non-trivial ParameterAttributes, default value and custom attributes
+ p = method_builder.DefineParameter(i + 1, ParameterAttributes.None, arg_names[i + instance_offset])
+
+ ilgen = method_builder.GetILGenerator()
+
+ args_array = ilgen.DeclareLocal(Array[object])
+ args_count = len(function.arg_types)
+ ilgen.Emit(OpCodes.Ldc_I4, args_count)
+ ilgen.Emit(OpCodes.Newarr, object)
+ ilgen.Emit(OpCodes.Stloc, args_array)
+ for i in xrange(args_count):
+ arg_type = function.arg_types[i]
+ if clr.GetClrType(arg_type).IsByRef:
+ raise NotImplementedError("byref params not supported")
+ ilgen.Emit(OpCodes.Ldloc, args_array)
+ ilgen.Emit(OpCodes.Ldc_I4, i)
+ ilgen.Emit(OpCodes.Ldarg, i + int(not function_info.is_static))
+ ilgen.Emit(OpCodes.Box, arg_type)
+ ilgen.Emit(OpCodes.Stelem_Ref)
+
+ has_return_value = True
+ if function_info.prop_name_if_prop_get:
+ ilgen.Emit(OpCodes.Ldsfld, ClrClass.get_dynamic_operations_field())
+ ilgen.Emit(OpCodes.Ldarg, 0)
+ ilgen.Emit(OpCodes.Ldstr, function_info.prop_name_if_prop_get)
+ ilgen.Emit(OpCodes.Callvirt, get_member)
+ elif function_info.prop_name_if_prop_set:
+ ilgen.Emit(OpCodes.Ldsfld, ClrClass.get_dynamic_operations_field())
+ ilgen.Emit(OpCodes.Ldarg, 0)
+ ilgen.Emit(OpCodes.Ldstr, function_info.prop_name_if_prop_set)
+ ilgen.Emit(OpCodes.Ldarg, 1)
+ ilgen.Emit(OpCodes.Callvirt, set_member)
+ has_return_value = False
+ else:
+ ilgen.Emit(OpCodes.Ldsfld, ClrClass.get_dynamic_operations_field())
+ if function_info.is_static:
+ raise NotImplementedError("need to load Python class object from a CLR static field")
+ # ilgen.Emit(OpCodes.Ldsfld, class_object)
+ else:
+ ilgen.Emit(OpCodes.Ldarg, 0)
+
+ ilgen.Emit(OpCodes.Ldstr, function.func_name)
+ ilgen.Emit(OpCodes.Ldloc, args_array)
+ ilgen.Emit(OpCodes.Callvirt, invoke_member)
+
+ if has_return_value:
+ if function.return_type == Void:
+ ilgen.Emit(OpCodes.Pop)
+ else:
+ ret_val = ilgen.DeclareLocal(object)
+ ilgen.Emit(OpCodes.Stloc, ret_val)
+ ilgen.Emit(OpCodes.Ldsfld, ClrClass.get_dynamic_operations_field())
+ ilgen.Emit(OpCodes.Ldloc, ret_val)
+ ilgen.Emit(OpCodes.Ldtoken, clr.GetClrType(function.return_type))
+ ilgen.Emit(OpCodes.Call, get_type_from_handle)
+ ilgen.Emit(OpCodes.Callvirt, convert_to)
+ ilgen.Emit(OpCodes.Unbox_Any, function.return_type)
+ ilgen.Emit(OpCodes.Ret)
+ return method_builder
+
+ def emit_method(self, typebld, function_info):
+ function = function_info.function
+ if hasattr(function, "DllImportAttributeDecorator"):
+ dllImportAttributeDecorator = function.DllImportAttributeDecorator
+ name = function.func_name
+ dllName = dllImportAttributeDecorator.args[0]
+ entryName = function.func_name
+ attributes = MethodAttributes.Public | MethodAttributes.Static | MethodAttributes.PinvokeImpl
+ callingConvention = CallingConventions.Standard
+ returnType = function.return_type
+ returnTypeRequiredCustomModifiers = ()
+ returnTypeOptionalCustomModifiers = ()
+ parameterTypes = function.arg_types
+ parameterTypeRequiredCustomModifiers = None
+ parameterTypeOptionalCustomModifiers = None
+ nativeCallConv = CallingConvention.Winapi
+ nativeCharSet = CharSet.Auto
+ method_builder = typebld.DefinePInvokeMethod(
+ name,
+ dllName,
+ entryName,
+ attributes,
+ callingConvention,
+ returnType,
+ returnTypeRequiredCustomModifiers,
+ returnTypeOptionalCustomModifiers,
+ parameterTypes,
+ parameterTypeRequiredCustomModifiers,
+ parameterTypeOptionalCustomModifiers,
+ nativeCallConv,
+ nativeCharSet)
+ else:
+ method_builder = self.emit_typed_stub_to_python_method(typebld, function_info)
+
+ if hasattr(function, "CustomAttributeBuilders"):
+ for cab in function.CustomAttributeBuilders:
+ method_builder.SetCustomAttribute(cab)
+
+ return method_builder
+
+ def map_pinvoke_methods(self, new_type):
+ pythonType = clr.GetPythonType(new_type)
+ for function_info in self.get_typed_methods():
+ function = function_info.function
+ if hasattr(function, "DllImportAttributeDecorator"):
+ # Overwrite the Python function with the pinvoke_method
+ pinvoke_method = getattr(pythonType, function.func_name)
+ setattr(self, function.func_name, pinvoke_method)
+
+ def emit_python_type_field(self, typebld):
+ return typebld.DefineField(
+ "PythonType",
+ PythonType,
+ FieldAttributes.Public | FieldAttributes.Static)
+
+ def set_python_type_field(self, new_type):
+ self.PythonType = new_type.GetField("PythonType")
+ self.PythonType.SetValue(None, self)
+
+ def add_wrapper_ctors(self, baseType, typebld):
+ python_type_field = self.emit_python_type_field(typebld)
+ for ctor in baseType.GetConstructors():
+ ctorparams = ctor.GetParameters()
+
+ # leave out the PythonType argument
+ assert(ctorparams[0].ParameterType == clr.GetClrType(PythonType))
+ ctorparams = ctorparams[1:]
+
+ ctorbld = typebld.DefineConstructor(
+ ctor.Attributes,
+ ctor.CallingConvention,
+ tuple([p.ParameterType for p in ctorparams]))
+ ilgen = ctorbld.GetILGenerator()
+ ilgen.Emit(OpCodes.Ldarg, 0)
+ ilgen.Emit(OpCodes.Ldsfld, python_type_field)
+ for index in xrange(len(ctorparams)):
+ ilgen.Emit(OpCodes.Ldarg, index + 1)
+ ilgen.Emit(OpCodes.Call, ctor)
+ ilgen.Emit(OpCodes.Ret)
+
+ def emit_members(self, typebld):
+ self.emit_fields(typebld)
+ self.add_wrapper_ctors(self.baseType, typebld)
+ super(ClrClass, self).emit_members(typebld)
+
+ def map_members(self, new_type):
+ self.map_fields(new_type)
+ self.map_pinvoke_methods(new_type)
+ self.set_python_type_field(new_type)
+ super(ClrClass, self).map_members(new_type)
+
+ def __clrtype__(self):
+ # CDerived below will use ClrClass as its metaclass, but the user may not expect CDerived
+ # to be a typed .NET class in this case:
+ #
+ # class CBase(object):
+ # __metaclass__ = ClrClass
+ # class CDerived(CBase): pass
+ if not "__metaclass__" in self.__dict__:
+ return super(ClrClass, self).__clrtype__()
+
+ # Create a simple Python type first.
+ self.baseType = super(ClrType, self).__clrtype__()
+ # We will now subtype it to create a customized class with the
+ # CLR attributes as defined by the user
+ typegen = Snippets.Shared.DefineType(self.get_clr_type_name(), self.baseType, True, False)
+ typebld = typegen.TypeBuilder
+ return self.create_type(typebld)
+
+def make_cab(attrib_type, *args, **kwds):
+ clrtype = clr.GetClrType(attrib_type)
+ argtypes = tuple(map(lambda x:clr.GetClrType(type(x)), args))
+ ci = clrtype.GetConstructor(argtypes)
+
+ props = ([],[])
+ fields = ([],[])
+
+ for kwd in kwds:
+ pi = clrtype.GetProperty(kwd)
+ if pi is not None:
+ props[0].append(pi)
+ props[1].append(kwds[kwd])
+ else:
+ fi = clrtype.GetField(kwd)
+ if fi is not None:
+ fields[0].append(fi)
+ fields[1].append(kwds[kwd])
+ else:
+ raise TypeError("No %s Member found on %s" % (kwd, clrtype.Name))
+
+ return CustomAttributeBuilder(ci, args,
+ tuple(props[0]), tuple(props[1]),
+ tuple(fields[0]), tuple(fields[1]))
+
+def accepts(*args):
+ """
+ TODO - needs to be merged with clr.accepts
+ """
+ validate_clr_types(args, True)
+ def decorator(function):
+ function.arg_types = args
+ return function
+ return decorator
+
+def returns(return_type = Void):
+ """
+ TODO - needs to be merged with clr.returns
+ """
+ if return_type != Void:
+ validate_clr_types(return_type)
+ def decorator(function):
+ function.return_type = return_type
+ return function
+ return decorator
+
+class CustomAttributeDecorator(object):
+ """
+ This represents information about a custom-attribute applied to a type or a method
+ Note that we cannot use an instance of System.Attribute to capture this information
+ as it is not possible to go from an instance of System.Attribute to an instance
+ of System.Reflection.Emit.CustomAttributeBuilder as the latter needs to know
+ how to represent information in metadata to later *recreate* a similar instance of
+ System.Attribute.
+
+ Also note that once a CustomAttributeBuilder is created, it is not possible to
+ query it. Hence, we need to store the arguments required to store the
+ CustomAttributeBuilder so that pseudo-custom-attributes can get to the information.
+ """
+ def __init__(self, attrib_type, *args, **kwargs):
+ self.attrib_type = attrib_type
+ self.args = args
+ self.kwargs = kwargs
+
+ def __call__(self, function):
+ if self.attrib_type == DllImportAttribute:
+ function.DllImportAttributeDecorator = self
+ else:
+ if not hasattr(function, "CustomAttributeBuilders"):
+ function.CustomAttributeBuilders = []
+ function.CustomAttributeBuilders.append(self.GetBuilder())
+ return function
+
+ def GetBuilder(self):
+ assert not self.attrib_type in [DllImportAttribute]
+ return make_cab(self.attrib_type, *self.args, **self.kwargs)
+
+def attribute(attrib_type):
+ """
+ This decorator is used to specify a CustomAttribute for a type or method.
+ """
+ def make_decorator(*args, **kwargs):
+ return CustomAttributeDecorator(attrib_type, *args, **kwargs)
+ return make_decorator
+
+def propagate_attributes(old_function, new_function):
+ """
+ Use this if you replace a function in a type with ClrInterface or ClrClass as the metaclass.
+ This will typically be needed if you are defining a decorator which wraps functions with
+ new functions, and want it to work in conjunction with clrtype
+ """
+ if hasattr(old_function, "return_type"):
+ new_function.func_name = old_function.func_name
+ new_function.return_type = old_function.return_type
+ new_function.arg_types = old_function.arg_types
+ if hasattr(old_function, "CustomAttributeBuilders"):
+ new_function.CustomAttributeBuilders = old_function.CustomAttributeBuilders
+ if hasattr(old_function, "CustomAttributeBuilders"):
+ new_function.DllImportAttributeDecorator = old_function.DllImportAttributeDecorator
+
diff --git a/cashew/Lib/cmd.py b/cashew/Lib/cmd.py
new file mode 100644
index 0000000..05ba7e3
--- /dev/null
+++ b/cashew/Lib/cmd.py
@@ -0,0 +1,404 @@
+"""A generic class to build line-oriented command interpreters.
+
+Interpreters constructed with this class obey the following conventions:
+
+1. End of file on input is processed as the command 'EOF'.
+2. A command is parsed out of each line by collecting the prefix composed
+ of characters in the identchars member.
+3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
+ is passed a single argument consisting of the remainder of the line.
+4. Typing an empty line repeats the last command. (Actually, it calls the
+ method `emptyline', which may be overridden in a subclass.)
+5. There is a predefined `help' method. Given an argument `topic', it
+ calls the command `help_topic'. With no arguments, it lists all topics
+ with defined help_ functions, broken into up to three topics; documented
+ commands, miscellaneous help topics, and undocumented commands.
+6. The command '?' is a synonym for `help'. The command '!' is a synonym
+ for `shell', if a do_shell method exists.
+7. If completion is enabled, completing commands will be done automatically,
+ and completing of commands args is done by calling complete_foo() with
+ arguments text, line, begidx, endidx. text is string we are matching
+ against, all returned matches must begin with it. line is the current
+ input line (lstripped), begidx and endidx are the beginning and end
+ indexes of the text being matched, which could be used to provide
+ different completion depending upon which position the argument is in.
+
+The `default' method may be overridden to intercept commands for which there
+is no do_ method.
+
+The `completedefault' method may be overridden to intercept completions for
+commands that have no complete_ method.
+
+The data member `self.ruler' sets the character used to draw separator lines
+in the help messages. If empty, no ruler line is drawn. It defaults to "=".
+
+If the value of `self.intro' is nonempty when the cmdloop method is called,
+it is printed out on interpreter startup. This value may be overridden
+via an optional argument to the cmdloop() method.
+
+The data members `self.doc_header', `self.misc_header', and
+`self.undoc_header' set the headers used for the help function's
+listings of documented functions, miscellaneous topics, and undocumented
+functions respectively.
+
+These interpreters use raw_input; thus, if the readline module is loaded,
+they automatically support Emacs-like command history and editing features.
+"""
+
+import string
+
+__all__ = ["Cmd"]
+
+PROMPT = '(Cmd) '
+IDENTCHARS = string.ascii_letters + string.digits + '_'
+
+class Cmd:
+ """A simple framework for writing line-oriented command interpreters.
+
+ These are often useful for test harnesses, administrative tools, and
+ prototypes that will later be wrapped in a more sophisticated interface.
+
+ A Cmd instance or subclass instance is a line-oriented interpreter
+ framework. There is no good reason to instantiate Cmd itself; rather,
+ it's useful as a superclass of an interpreter class you define yourself
+ in order to inherit Cmd's methods and encapsulate action methods.
+
+ """
+ prompt = PROMPT
+ identchars = IDENTCHARS
+ ruler = '='
+ lastcmd = ''
+ intro = None
+ doc_leader = ""
+ doc_header = "Documented commands (type help ):"
+ misc_header = "Miscellaneous help topics:"
+ undoc_header = "Undocumented commands:"
+ nohelp = "*** No help on %s"
+ use_rawinput = 1
+
+ def __init__(self, completekey='tab', stdin=None, stdout=None):
+ """Instantiate a line-oriented interpreter framework.
+
+ The optional argument 'completekey' is the readline name of a
+ completion key; it defaults to the Tab key. If completekey is
+ not None and the readline module is available, command completion
+ is done automatically. The optional arguments stdin and stdout
+ specify alternate input and output file objects; if not specified,
+ sys.stdin and sys.stdout are used.
+
+ """
+ import sys
+ if stdin is not None:
+ self.stdin = stdin
+ else:
+ self.stdin = sys.stdin
+ if stdout is not None:
+ self.stdout = stdout
+ else:
+ self.stdout = sys.stdout
+ self.cmdqueue = []
+ self.completekey = completekey
+
+ def cmdloop(self, intro=None):
+ """Repeatedly issue a prompt, accept input, parse an initial prefix
+ off the received input, and dispatch to action methods, passing them
+ the remainder of the line as argument.
+
+ """
+
+ self.preloop()
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ self.old_completer = readline.get_completer()
+ readline.set_completer(self.complete)
+ readline.parse_and_bind(self.completekey+": complete")
+ except ImportError:
+ pass
+ try:
+ if intro is not None:
+ self.intro = intro
+ if self.intro:
+ self.stdout.write(str(self.intro)+"\n")
+ stop = None
+ while not stop:
+ if self.cmdqueue:
+ line = self.cmdqueue.pop(0)
+ else:
+ if self.use_rawinput:
+ try:
+ line = raw_input(self.prompt)
+ except EOFError:
+ line = 'EOF'
+ else:
+ self.stdout.write(self.prompt)
+ self.stdout.flush()
+ line = self.stdin.readline()
+ if not len(line):
+ line = 'EOF'
+ else:
+ line = line.rstrip('\r\n')
+ line = self.precmd(line)
+ stop = self.onecmd(line)
+ stop = self.postcmd(stop, line)
+ self.postloop()
+ finally:
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ readline.set_completer(self.old_completer)
+ except ImportError:
+ pass
+
+
+ def precmd(self, line):
+ """Hook method executed just before the command line is
+ interpreted, but after the input prompt is generated and issued.
+
+ """
+ return line
+
+ def postcmd(self, stop, line):
+ """Hook method executed just after a command dispatch is finished."""
+ return stop
+
+ def preloop(self):
+ """Hook method executed once when the cmdloop() method is called."""
+ pass
+
+ def postloop(self):
+ """Hook method executed once when the cmdloop() method is about to
+ return.
+
+ """
+ pass
+
+ def parseline(self, line):
+ """Parse the line into a command name and a string containing
+ the arguments. Returns a tuple containing (command, args, line).
+ 'command' and 'args' may be None if the line couldn't be parsed.
+ """
+ line = line.strip()
+ if not line:
+ return None, None, line
+ elif line[0] == '?':
+ line = 'help ' + line[1:]
+ elif line[0] == '!':
+ if hasattr(self, 'do_shell'):
+ line = 'shell ' + line[1:]
+ else:
+ return None, None, line
+ i, n = 0, len(line)
+ while i < n and line[i] in self.identchars: i = i+1
+ cmd, arg = line[:i], line[i:].strip()
+ return cmd, arg, line
+
+ def onecmd(self, line):
+ """Interpret the argument as though it had been typed in response
+ to the prompt.
+
+ This may be overridden, but should not normally need to be;
+ see the precmd() and postcmd() methods for useful execution hooks.
+ The return value is a flag indicating whether interpretation of
+ commands by the interpreter should stop.
+
+ """
+ cmd, arg, line = self.parseline(line)
+ if not line:
+ return self.emptyline()
+ if cmd is None:
+ return self.default(line)
+ self.lastcmd = line
+ if line == 'EOF' :
+ self.lastcmd = ''
+ if cmd == '':
+ return self.default(line)
+ else:
+ try:
+ func = getattr(self, 'do_' + cmd)
+ except AttributeError:
+ return self.default(line)
+ return func(arg)
+
+ def emptyline(self):
+ """Called when an empty line is entered in response to the prompt.
+
+ If this method is not overridden, it repeats the last nonempty
+ command entered.
+
+ """
+ if self.lastcmd:
+ return self.onecmd(self.lastcmd)
+
+ def default(self, line):
+ """Called on an input line when the command prefix is not recognized.
+
+ If this method is not overridden, it prints an error message and
+ returns.
+
+ """
+ self.stdout.write('*** Unknown syntax: %s\n'%line)
+
+ def completedefault(self, *ignored):
+ """Method called to complete an input line when no command-specific
+ complete_*() method is available.
+
+ By default, it returns an empty list.
+
+ """
+ return []
+
+ def completenames(self, text, *ignored):
+ dotext = 'do_'+text
+ return [a[3:] for a in self.get_names() if a.startswith(dotext)]
+
+ def complete(self, text, state):
+ """Return the next possible completion for 'text'.
+
+ If a command has not been entered, then complete against command list.
+ Otherwise try to call complete_ to get list of completions.
+ """
+ if state == 0:
+ import readline
+ origline = readline.get_line_buffer()
+ line = origline.lstrip()
+ stripped = len(origline) - len(line)
+ begidx = readline.get_begidx() - stripped
+ endidx = readline.get_endidx() - stripped
+ if begidx>0:
+ cmd, args, foo = self.parseline(line)
+ if cmd == '':
+ compfunc = self.completedefault
+ else:
+ try:
+ compfunc = getattr(self, 'complete_' + cmd)
+ except AttributeError:
+ compfunc = self.completedefault
+ else:
+ compfunc = self.completenames
+ self.completion_matches = compfunc(text, line, begidx, endidx)
+ try:
+ return self.completion_matches[state]
+ except IndexError:
+ return None
+
+ def get_names(self):
+ # This method used to pull in base class attributes
+ # at a time dir() didn't do it yet.
+ return dir(self.__class__)
+
+ def complete_help(self, *args):
+ commands = set(self.completenames(*args))
+ topics = set(a[5:] for a in self.get_names()
+ if a.startswith('help_' + args[0]))
+ return list(commands | topics)
+
+ def do_help(self, arg):
+ 'List available commands with "help" or detailed help with "help cmd".'
+ if arg:
+ # XXX check arg syntax
+ try:
+ func = getattr(self, 'help_' + arg)
+ except AttributeError:
+ try:
+ doc=getattr(self, 'do_' + arg).__doc__
+ if doc:
+ self.stdout.write("%s\n"%str(doc))
+ return
+ except AttributeError:
+ pass
+ self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
+ return
+ func()
+ else:
+ names = self.get_names()
+ cmds_doc = []
+ cmds_undoc = []
+ help = {}
+ for name in names:
+ if name[:5] == 'help_':
+ help[name[5:]]=1
+ names.sort()
+ # There can be duplicates if routines overridden
+ prevname = ''
+ for name in names:
+ if name[:3] == 'do_':
+ if name == prevname:
+ continue
+ prevname = name
+ cmd=name[3:]
+ if cmd in help:
+ cmds_doc.append(cmd)
+ del help[cmd]
+ elif getattr(self, name).__doc__:
+ cmds_doc.append(cmd)
+ else:
+ cmds_undoc.append(cmd)
+ self.stdout.write("%s\n"%str(self.doc_leader))
+ self.print_topics(self.doc_header, cmds_doc, 15,80)
+ self.print_topics(self.misc_header, help.keys(),15,80)
+ self.print_topics(self.undoc_header, cmds_undoc, 15,80)
+
+ def print_topics(self, header, cmds, cmdlen, maxcol):
+ if cmds:
+ self.stdout.write("%s\n"%str(header))
+ if self.ruler:
+ self.stdout.write("%s\n"%str(self.ruler * len(header)))
+ self.columnize(cmds, maxcol-1)
+ self.stdout.write("\n")
+
+ def columnize(self, list, displaywidth=80):
+ """Display a list of strings as a compact set of columns.
+
+ Each column is only as wide as necessary.
+ Columns are separated by two spaces (one was not legible enough).
+ """
+ if not list:
+ self.stdout.write("\n")
+ return
+ nonstrings = [i for i in range(len(list))
+ if not isinstance(list[i], str)]
+ if nonstrings:
+ raise TypeError, ("list[i] not a string for i in %s" %
+ ", ".join(map(str, nonstrings)))
+ size = len(list)
+ if size == 1:
+ self.stdout.write('%s\n'%str(list[0]))
+ return
+ # Try every row count from 1 upwards
+ for nrows in range(1, len(list)):
+ ncols = (size+nrows-1) // nrows
+ colwidths = []
+ totwidth = -2
+ for col in range(ncols):
+ colwidth = 0
+ for row in range(nrows):
+ i = row + nrows*col
+ if i >= size:
+ break
+ x = list[i]
+ colwidth = max(colwidth, len(x))
+ colwidths.append(colwidth)
+ totwidth += colwidth + 2
+ if totwidth > displaywidth:
+ break
+ if totwidth <= displaywidth:
+ break
+ else:
+ nrows = len(list)
+ ncols = 1
+ colwidths = [0]
+ for row in range(nrows):
+ texts = []
+ for col in range(ncols):
+ i = row + nrows*col
+ if i >= size:
+ x = ""
+ else:
+ x = list[i]
+ texts.append(x)
+ while texts and not texts[-1]:
+ del texts[-1]
+ for col in range(len(texts)):
+ texts[col] = texts[col].ljust(colwidths[col])
+ self.stdout.write("%s\n"%str(" ".join(texts)))
diff --git a/cashew/Lib/code.py b/cashew/Lib/code.py
new file mode 100644
index 0000000..3b39d1b
--- /dev/null
+++ b/cashew/Lib/code.py
@@ -0,0 +1,310 @@
+"""Utilities needed to emulate Python's interactive interpreter.
+
+"""
+
+# Inspired by similar code by Jeff Epler and Fredrik Lundh.
+
+
+import sys
+import traceback
+from codeop import CommandCompiler, compile_command
+
+__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact",
+ "compile_command"]
+
+def softspace(file, newvalue):
+ oldvalue = 0
+ try:
+ oldvalue = file.softspace
+ except AttributeError:
+ pass
+ try:
+ file.softspace = newvalue
+ except (AttributeError, TypeError):
+ # "attribute-less object" or "read-only attributes"
+ pass
+ return oldvalue
+
+class InteractiveInterpreter:
+ """Base class for InteractiveConsole.
+
+ This class deals with parsing and interpreter state (the user's
+ namespace); it doesn't deal with input buffering or prompting or
+ input file naming (the filename is always passed in explicitly).
+
+ """
+
+ def __init__(self, locals=None):
+ """Constructor.
+
+ The optional 'locals' argument specifies the dictionary in
+ which code will be executed; it defaults to a newly created
+ dictionary with key "__name__" set to "__console__" and key
+ "__doc__" set to None.
+
+ """
+ if locals is None:
+ locals = {"__name__": "__console__", "__doc__": None}
+ self.locals = locals
+ self.compile = CommandCompiler()
+
+ def runsource(self, source, filename=" ", symbol="single"):
+ """Compile and run some source in the interpreter.
+
+ Arguments are as for compile_command().
+
+ One several things can happen:
+
+ 1) The input is incorrect; compile_command() raised an
+ exception (SyntaxError or OverflowError). A syntax traceback
+ will be printed by calling the showsyntaxerror() method.
+
+ 2) The input is incomplete, and more input is required;
+ compile_command() returned None. Nothing happens.
+
+ 3) The input is complete; compile_command() returned a code
+ object. The code is executed by calling self.runcode() (which
+ also handles run-time exceptions, except for SystemExit).
+
+ The return value is True in case 2, False in the other cases (unless
+ an exception is raised). The return value can be used to
+ decide whether to use sys.ps1 or sys.ps2 to prompt the next
+ line.
+
+ """
+ try:
+ code = self.compile(source, filename, symbol)
+ except (OverflowError, SyntaxError, ValueError):
+ # Case 1
+ self.showsyntaxerror(filename)
+ return False
+
+ if code is None:
+ # Case 2
+ return True
+
+ # Case 3
+ self.runcode(code)
+ return False
+
+ def runcode(self, code):
+ """Execute a code object.
+
+ When an exception occurs, self.showtraceback() is called to
+ display a traceback. All exceptions are caught except
+ SystemExit, which is reraised.
+
+ A note about KeyboardInterrupt: this exception may occur
+ elsewhere in this code, and may not always be caught. The
+ caller should be prepared to deal with it.
+
+ """
+ try:
+ exec code in self.locals
+ except SystemExit:
+ raise
+ except:
+ self.showtraceback()
+ else:
+ if softspace(sys.stdout, 0):
+ print
+
+ def showsyntaxerror(self, filename=None):
+ """Display the syntax error that just occurred.
+
+ This doesn't display a stack trace because there isn't one.
+
+ If a filename is given, it is stuffed in the exception instead
+ of what was there before (because Python's parser always uses
+ "" when reading from a string).
+
+ The output is written by self.write(), below.
+
+ """
+ type, value, sys.last_traceback = sys.exc_info()
+ sys.last_type = type
+ sys.last_value = value
+ if filename and type is SyntaxError:
+ # Work hard to stuff the correct filename in the exception
+ try:
+ msg, (dummy_filename, lineno, offset, line) = value
+ except:
+ # Not the format we expect; leave it alone
+ pass
+ else:
+ # Stuff in the right filename
+ value = SyntaxError(msg, (filename, lineno, offset, line))
+ sys.last_value = value
+ list = traceback.format_exception_only(type, value)
+ map(self.write, list)
+
+ def showtraceback(self):
+ """Display the exception that just occurred.
+
+ We remove the first stack item because it is our own code.
+
+ The output is written by self.write(), below.
+
+ """
+ try:
+ type, value, tb = sys.exc_info()
+ sys.last_type = type
+ sys.last_value = value
+ sys.last_traceback = tb
+ tblist = traceback.extract_tb(tb)
+ del tblist[:1]
+ list = traceback.format_list(tblist)
+ if list:
+ list.insert(0, "Traceback (most recent call last):\n")
+ list[len(list):] = traceback.format_exception_only(type, value)
+ finally:
+ tblist = tb = None
+ map(self.write, list)
+
+ def write(self, data):
+ """Write a string.
+
+ The base implementation writes to sys.stderr; a subclass may
+ replace this with a different implementation.
+
+ """
+ sys.stderr.write(data)
+
+
+class InteractiveConsole(InteractiveInterpreter):
+ """Closely emulate the behavior of the interactive Python interpreter.
+
+ This class builds on InteractiveInterpreter and adds prompting
+ using the familiar sys.ps1 and sys.ps2, and input buffering.
+
+ """
+
+ def __init__(self, locals=None, filename=""):
+ """Constructor.
+
+ The optional locals argument will be passed to the
+ InteractiveInterpreter base class.
+
+ The optional filename argument should specify the (file)name
+ of the input stream; it will show up in tracebacks.
+
+ """
+ InteractiveInterpreter.__init__(self, locals)
+ self.filename = filename
+ self.resetbuffer()
+
+ def resetbuffer(self):
+ """Reset the input buffer."""
+ self.buffer = []
+
+ def interact(self, banner=None):
+ """Closely emulate the interactive Python console.
+
+ The optional banner argument specify the banner to print
+ before the first interaction; by default it prints a banner
+ similar to the one printed by the real Python interpreter,
+ followed by the current class name in parentheses (so as not
+ to confuse this with the real interpreter -- since it's so
+ close!).
+
+ """
+ try:
+ sys.ps1
+ except AttributeError:
+ sys.ps1 = ">>> "
+ try:
+ sys.ps2
+ except AttributeError:
+ sys.ps2 = "... "
+ cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
+ if banner is None:
+ self.write("Python %s on %s\n%s\n(%s)\n" %
+ (sys.version, sys.platform, cprt,
+ self.__class__.__name__))
+ else:
+ self.write("%s\n" % str(banner))
+ more = 0
+ while 1:
+ try:
+ if more:
+ prompt = sys.ps2
+ else:
+ prompt = sys.ps1
+ try:
+ line = self.raw_input(prompt)
+ # Can be None if sys.stdin was redefined
+ encoding = getattr(sys.stdin, "encoding", None)
+ if encoding and not isinstance(line, unicode):
+ line = line.decode(encoding)
+ except EOFError:
+ self.write("\n")
+ break
+ else:
+ more = self.push(line)
+ except KeyboardInterrupt:
+ self.write("\nKeyboardInterrupt\n")
+ self.resetbuffer()
+ more = 0
+
+ def push(self, line):
+ """Push a line to the interpreter.
+
+ The line should not have a trailing newline; it may have
+ internal newlines. The line is appended to a buffer and the
+ interpreter's runsource() method is called with the
+ concatenated contents of the buffer as source. If this
+ indicates that the command was executed or invalid, the buffer
+ is reset; otherwise, the command is incomplete, and the buffer
+ is left as it was after the line was appended. The return
+ value is 1 if more input is required, 0 if the line was dealt
+ with in some way (this is the same as runsource()).
+
+ """
+ self.buffer.append(line)
+ source = "\n".join(self.buffer)
+ more = self.runsource(source, self.filename)
+ if not more:
+ self.resetbuffer()
+ return more
+
+ def raw_input(self, prompt=""):
+ """Write a prompt and read a line.
+
+ The returned line does not include the trailing newline.
+ When the user enters the EOF key sequence, EOFError is raised.
+
+ The base implementation uses the built-in function
+ raw_input(); a subclass may replace this with a different
+ implementation.
+
+ """
+ return raw_input(prompt)
+
+
+def interact(banner=None, readfunc=None, local=None):
+ """Closely emulate the interactive Python interpreter.
+
+ This is a backwards compatible interface to the InteractiveConsole
+ class. When readfunc is not specified, it attempts to import the
+ readline module to enable GNU readline if it is available.
+
+ Arguments (all optional, all default to None):
+
+ banner -- passed to InteractiveConsole.interact()
+ readfunc -- if not None, replaces InteractiveConsole.raw_input()
+ local -- passed to InteractiveInterpreter.__init__()
+
+ """
+ console = InteractiveConsole(local)
+ if readfunc is not None:
+ console.raw_input = readfunc
+ else:
+ try:
+ import readline
+ except ImportError:
+ pass
+ console.interact(banner)
+
+
+if __name__ == "__main__":
+ interact()
diff --git a/cashew/Lib/codecs.py b/cashew/Lib/codecs.py
new file mode 100644
index 0000000..e120d63
--- /dev/null
+++ b/cashew/Lib/codecs.py
@@ -0,0 +1,1115 @@
+""" codecs -- Python Codec Registry, API and helpers.
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+
+import __builtin__, sys
+
+### Registry and builtin stateless codec functions
+
+try:
+ from _codecs import *
+except ImportError, why:
+ raise SystemError('Failed to load the builtin codecs: %s' % why)
+
+__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
+ "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
+ "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
+ "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
+ "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",
+ "StreamReader", "StreamWriter",
+ "StreamReaderWriter", "StreamRecoder",
+ "getencoder", "getdecoder", "getincrementalencoder",
+ "getincrementaldecoder", "getreader", "getwriter",
+ "encode", "decode", "iterencode", "iterdecode",
+ "strict_errors", "ignore_errors", "replace_errors",
+ "xmlcharrefreplace_errors", "backslashreplace_errors",
+ "register_error", "lookup_error"]
+
+### Constants
+
+#
+# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
+# and its possible byte string values
+# for UTF8/UTF16/UTF32 output and little/big endian machines
+#
+
+# UTF-8
+BOM_UTF8 = '\xef\xbb\xbf'
+
+# UTF-16, little endian
+BOM_LE = BOM_UTF16_LE = '\xff\xfe'
+
+# UTF-16, big endian
+BOM_BE = BOM_UTF16_BE = '\xfe\xff'
+
+# UTF-32, little endian
+BOM_UTF32_LE = '\xff\xfe\x00\x00'
+
+# UTF-32, big endian
+BOM_UTF32_BE = '\x00\x00\xfe\xff'
+
+if sys.byteorder == 'little':
+
+ # UTF-16, native endianness
+ BOM = BOM_UTF16 = BOM_UTF16_LE
+
+ # UTF-32, native endianness
+ BOM_UTF32 = BOM_UTF32_LE
+
+else:
+
+ # UTF-16, native endianness
+ BOM = BOM_UTF16 = BOM_UTF16_BE
+
+ # UTF-32, native endianness
+ BOM_UTF32 = BOM_UTF32_BE
+
+# Old broken names (don't use in new code)
+BOM32_LE = BOM_UTF16_LE
+BOM32_BE = BOM_UTF16_BE
+BOM64_LE = BOM_UTF32_LE
+BOM64_BE = BOM_UTF32_BE
+
+
+### Codec base classes (defining the API)
+
+class CodecInfo(tuple):
+ """Codec details when looking up the codec registry"""
+
+ # Private API to allow Python to blacklist the known non-Unicode
+ # codecs in the standard library. A more general mechanism to
+ # reliably distinguish test encodings from other codecs will hopefully
+ # be defined for Python 3.5
+ #
+ # See http://bugs.python.org/issue19619
+ _is_text_encoding = True # Assume codecs are text encodings by default
+
+ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
+ incrementalencoder=None, incrementaldecoder=None, name=None,
+ _is_text_encoding=None):
+ self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
+ self.name = name
+ self.encode = encode
+ self.decode = decode
+ self.incrementalencoder = incrementalencoder
+ self.incrementaldecoder = incrementaldecoder
+ self.streamwriter = streamwriter
+ self.streamreader = streamreader
+ if _is_text_encoding is not None:
+ self._is_text_encoding = _is_text_encoding
+ return self
+
+ def __repr__(self):
+ return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+
+class Codec:
+
+ """ Defines the interface for stateless encoders/decoders.
+
+ The .encode()/.decode() methods may use different error
+ handling schemes by providing the errors argument. These
+ string values are predefined:
+
+ 'strict' - raise a ValueError error (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace' - replace with a suitable replacement character;
+ Python will use the official U+FFFD REPLACEMENT
+ CHARACTER for the builtin Unicode codecs on
+ decoding and '?' on encoding.
+ 'xmlcharrefreplace' - Replace with the appropriate XML
+ character reference (only for encoding).
+ 'backslashreplace' - Replace with backslashed escape sequences
+ (only for encoding).
+
+ The set of allowed values can be extended via register_error.
+
+ """
+ def encode(self, input, errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling.
+
+ The method may not store state in the Codec instance. Use
+ StreamWriter for codecs which have to keep state in order to
+ make encoding efficient.
+
+ The encoder must be able to handle zero length input and
+ return an empty object of the output object type in this
+ situation.
+
+ """
+ raise NotImplementedError
+
+ def decode(self, input, errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling.
+
+ The method may not store state in the Codec instance. Use
+ StreamReader for codecs which have to keep state in order to
+ make decoding efficient.
+
+ The decoder must be able to handle zero length input and
+ return an empty object of the output object type in this
+ situation.
+
+ """
+ raise NotImplementedError
+
+class IncrementalEncoder(object):
+ """
+ An IncrementalEncoder encodes an input in multiple steps. The input can be
+ passed piece by piece to the encode() method. The IncrementalEncoder remembers
+ the state of the Encoding process between calls to encode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Creates an IncrementalEncoder instance.
+
+ The IncrementalEncoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+ self.buffer = ""
+
+ def encode(self, input, final=False):
+ """
+ Encodes input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Resets the encoder to the initial state.
+ """
+
+ def getstate(self):
+ """
+ Return the current state of the encoder.
+ """
+ return 0
+
+ def setstate(self, state):
+ """
+ Set the current state of the encoder. state must have been
+ returned by getstate().
+ """
+
+class BufferedIncrementalEncoder(IncrementalEncoder):
+ """
+ This subclass of IncrementalEncoder can be used as the baseclass for an
+ incremental encoder if the encoder must keep some of the output in a
+ buffer between calls to encode().
+ """
+ def __init__(self, errors='strict'):
+ IncrementalEncoder.__init__(self, errors)
+ self.buffer = "" # unencoded input that is kept between calls to encode()
+
+ def _buffer_encode(self, input, errors, final):
+ # Overwrite this method in subclasses: It must encode input
+ # and return an (output, length consumed) tuple
+ raise NotImplementedError
+
+ def encode(self, input, final=False):
+ # encode input (taking the buffer into account)
+ data = self.buffer + input
+ (result, consumed) = self._buffer_encode(data, self.errors, final)
+ # keep unencoded input until the next call
+ self.buffer = data[consumed:]
+ return result
+
+ def reset(self):
+ IncrementalEncoder.reset(self)
+ self.buffer = ""
+
+ def getstate(self):
+ return self.buffer or 0
+
+ def setstate(self, state):
+ self.buffer = state or ""
+
+class IncrementalDecoder(object):
+ """
+ An IncrementalDecoder decodes an input in multiple steps. The input can be
+ passed piece by piece to the decode() method. The IncrementalDecoder
+ remembers the state of the decoding process between calls to decode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Creates an IncrementalDecoder instance.
+
+ The IncrementalDecoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+
+ def decode(self, input, final=False):
+ """
+ Decodes input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Resets the decoder to the initial state.
+ """
+
+ def getstate(self):
+ """
+ Return the current state of the decoder.
+
+ This must be a (buffered_input, additional_state_info) tuple.
+ buffered_input must be a bytes object containing bytes that
+ were passed to decode() that have not yet been converted.
+ additional_state_info must be a non-negative integer
+ representing the state of the decoder WITHOUT yet having
+ processed the contents of buffered_input. In the initial state
+ and after reset(), getstate() must return (b"", 0).
+ """
+ return (b"", 0)
+
+ def setstate(self, state):
+ """
+ Set the current state of the decoder.
+
+ state must have been returned by getstate(). The effect of
+ setstate((b"", 0)) must be equivalent to reset().
+ """
+
+class BufferedIncrementalDecoder(IncrementalDecoder):
+ """
+ This subclass of IncrementalDecoder can be used as the baseclass for an
+ incremental decoder if the decoder must be able to handle incomplete byte
+ sequences.
+ """
+ def __init__(self, errors='strict'):
+ IncrementalDecoder.__init__(self, errors)
+ self.buffer = "" # undecoded input that is kept between calls to decode()
+
+ def _buffer_decode(self, input, errors, final):
+ # Overwrite this method in subclasses: It must decode input
+ # and return an (output, length consumed) tuple
+ raise NotImplementedError
+
+ def decode(self, input, final=False):
+ # decode input (taking the buffer into account)
+ data = self.buffer + input
+ (result, consumed) = self._buffer_decode(data, self.errors, final)
+ # keep undecoded input until the next call
+ self.buffer = data[consumed:]
+ return result
+
+ def reset(self):
+ IncrementalDecoder.reset(self)
+ self.buffer = ""
+
+ def getstate(self):
+ # additional state info is always 0
+ return (self.buffer, 0)
+
+ def setstate(self, state):
+ # ignore additional state info
+ self.buffer = state[0]
+
+#
+# The StreamWriter and StreamReader class provide generic working
+# interfaces which can be used to implement new encoding submodules
+# very easily. See encodings/utf_8.py for an example on how this is
+# done.
+#
+
+class StreamWriter(Codec):
+
+ def __init__(self, stream, errors='strict'):
+
+ """ Creates a StreamWriter instance.
+
+ stream must be a file-like object open for writing
+ (binary) data.
+
+ The StreamWriter may use different error handling
+ schemes by providing the errors keyword argument. These
+ parameters are predefined:
+
+ 'strict' - raise a ValueError (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace'- replace with a suitable replacement character
+ 'xmlcharrefreplace' - Replace with the appropriate XML
+ character reference.
+ 'backslashreplace' - Replace with backslashed escape
+ sequences (only for encoding).
+
+ The set of allowed parameter values can be extended via
+ register_error.
+ """
+ self.stream = stream
+ self.errors = errors
+
+ def write(self, object):
+
+ """ Writes the object's contents encoded to self.stream.
+ """
+ data, consumed = self.encode(object, self.errors)
+ self.stream.write(data)
+
+ def writelines(self, list):
+
+ """ Writes the concatenated list of strings to the stream
+ using .write().
+ """
+ self.write(''.join(list))
+
+ def reset(self):
+
+ """ Flushes and resets the codec buffers used for keeping state.
+
+ Calling this method should ensure that the data on the
+ output is put into a clean state, that allows appending
+ of new fresh data without having to rescan the whole
+ stream to recover state.
+
+ """
+ pass
+
+ def seek(self, offset, whence=0):
+ self.stream.seek(offset, whence)
+ if whence == 0 and offset == 0:
+ self.reset()
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamReader(Codec):
+
+ def __init__(self, stream, errors='strict'):
+
+ """ Creates a StreamReader instance.
+
+ stream must be a file-like object open for reading
+ (binary) data.
+
+ The StreamReader may use different error handling
+ schemes by providing the errors keyword argument. These
+ parameters are predefined:
+
+ 'strict' - raise a ValueError (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace'- replace with a suitable replacement character;
+
+ The set of allowed parameter values can be extended via
+ register_error.
+ """
+ self.stream = stream
+ self.errors = errors
+ self.bytebuffer = ""
+ # For str->str decoding this will stay a str
+ # For str->unicode decoding the first read will promote it to unicode
+ self.charbuffer = ""
+ self.linebuffer = None
+
+ def decode(self, input, errors='strict'):
+ raise NotImplementedError
+
+ def read(self, size=-1, chars=-1, firstline=False):
+
+ """ Decodes data from the stream self.stream and returns the
+ resulting object.
+
+ chars indicates the number of characters to read from the
+ stream. read() will never return more than chars
+ characters, but it might return less, if there are not enough
+ characters available.
+
+ size indicates the approximate maximum number of bytes to
+ read from the stream for decoding purposes. The decoder
+ can modify this setting as appropriate. The default value
+ -1 indicates to read and decode as much as possible. size
+ is intended to prevent having to decode huge files in one
+ step.
+
+ If firstline is true, and a UnicodeDecodeError happens
+ after the first line terminator in the input only the first line
+ will be returned, the rest of the input will be kept until the
+ next call to read().
+
+ The method should use a greedy read strategy meaning that
+ it should read as much data as is allowed within the
+ definition of the encoding and the given size, e.g. if
+ optional encoding endings or state markers are available
+ on the stream, these should be read too.
+ """
+ # If we have lines cached, first merge them back into characters
+ if self.linebuffer:
+ self.charbuffer = "".join(self.linebuffer)
+ self.linebuffer = None
+
+ if chars < 0:
+ # For compatibility with other read() methods that take a
+ # single argument
+ chars = size
+
+ # read until we get the required number of characters (if available)
+ while True:
+ # can the request be satisfied from the character buffer?
+ if chars >= 0:
+ if len(self.charbuffer) >= chars:
+ break
+ # we need more data
+ if size < 0:
+ newdata = self.stream.read()
+ else:
+ newdata = self.stream.read(size)
+ # decode bytes (those remaining from the last call included)
+ data = self.bytebuffer + newdata
+ try:
+ newchars, decodedbytes = self.decode(data, self.errors)
+ except UnicodeDecodeError, exc:
+ if firstline:
+ newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
+ lines = newchars.splitlines(True)
+ if len(lines)<=1:
+ raise
+ else:
+ raise
+ # keep undecoded bytes until the next call
+ self.bytebuffer = data[decodedbytes:]
+ # put new characters in the character buffer
+ self.charbuffer += newchars
+ # there was no data available
+ if not newdata:
+ break
+ if chars < 0:
+ # Return everything we've got
+ result = self.charbuffer
+ self.charbuffer = ""
+ else:
+ # Return the first chars characters
+ result = self.charbuffer[:chars]
+ self.charbuffer = self.charbuffer[chars:]
+ return result
+
+ def readline(self, size=None, keepends=True):
+
+ """ Read one line from the input stream and return the
+ decoded data.
+
+ size, if given, is passed as size argument to the
+ read() method.
+
+ """
+ # If we have lines cached from an earlier read, return
+ # them unconditionally
+ if self.linebuffer:
+ line = self.linebuffer[0]
+ del self.linebuffer[0]
+ if len(self.linebuffer) == 1:
+ # revert to charbuffer mode; we might need more data
+ # next time
+ self.charbuffer = self.linebuffer[0]
+ self.linebuffer = None
+ if not keepends:
+ line = line.splitlines(False)[0]
+ return line
+
+ readsize = size or 72
+ line = ""
+ # If size is given, we call read() only once
+ while True:
+ data = self.read(readsize, firstline=True)
+ if data:
+ # If we're at a "\r" read one extra character (which might
+ # be a "\n") to get a proper line ending. If the stream is
+ # temporarily exhausted we return the wrong line ending.
+ if data.endswith("\r"):
+ data += self.read(size=1, chars=1)
+
+ line += data
+ lines = line.splitlines(True)
+ if lines:
+ if len(lines) > 1:
+ # More than one line result; the first line is a full line
+ # to return
+ line = lines[0]
+ del lines[0]
+ if len(lines) > 1:
+ # cache the remaining lines
+ lines[-1] += self.charbuffer
+ self.linebuffer = lines
+ self.charbuffer = None
+ else:
+ # only one remaining line, put it back into charbuffer
+ self.charbuffer = lines[0] + self.charbuffer
+ if not keepends:
+ line = line.splitlines(False)[0]
+ break
+ line0withend = lines[0]
+ line0withoutend = lines[0].splitlines(False)[0]
+ if line0withend != line0withoutend: # We really have a line end
+ # Put the rest back together and keep it until the next call
+ self.charbuffer = "".join(lines[1:]) + self.charbuffer
+ if keepends:
+ line = line0withend
+ else:
+ line = line0withoutend
+ break
+ # we didn't get anything or this was our only try
+ if not data or size is not None:
+ if line and not keepends:
+ line = line.splitlines(False)[0]
+ break
+ if readsize<8000:
+ readsize *= 2
+ return line
+
+ def readlines(self, sizehint=None, keepends=True):
+
+ """ Read all lines available on the input stream
+ and return them as list of lines.
+
+ Line breaks are implemented using the codec's decoder
+ method and are included in the list entries.
+
+ sizehint, if given, is ignored since there is no efficient
+ way to finding the true end-of-line.
+
+ """
+ data = self.read()
+ return data.splitlines(keepends)
+
+ def reset(self):
+
+ """ Resets the codec buffers used for keeping state.
+
+ Note that no stream repositioning should take place.
+ This method is primarily intended to be able to recover
+ from decoding errors.
+
+ """
+ self.bytebuffer = ""
+ self.charbuffer = u""
+ self.linebuffer = None
+
+ def seek(self, offset, whence=0):
+ """ Set the input stream's current position.
+
+ Resets the codec buffers used for keeping state.
+ """
+ self.stream.seek(offset, whence)
+ self.reset()
+
+ def next(self):
+
+ """ Return the next decoded line from the input stream."""
+ line = self.readline()
+ if line:
+ return line
+ raise StopIteration
+
+ def __iter__(self):
+ return self
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamReaderWriter:
+
+ """ StreamReaderWriter instances allow wrapping streams which
+ work in both read and write modes.
+
+ The design is such that one can use the factory functions
+ returned by the codec.lookup() function to construct the
+ instance.
+
+ """
+ # Optional attributes set by the file wrappers below
+ encoding = 'unknown'
+
+ def __init__(self, stream, Reader, Writer, errors='strict'):
+
+ """ Creates a StreamReaderWriter instance.
+
+ stream must be a Stream-like object.
+
+ Reader, Writer must be factory functions or classes
+ providing the StreamReader, StreamWriter interface resp.
+
+ Error handling is done in the same way as defined for the
+ StreamWriter/Readers.
+
+ """
+ self.stream = stream
+ self.reader = Reader(stream, errors)
+ self.writer = Writer(stream, errors)
+ self.errors = errors
+
+ def read(self, size=-1):
+
+ return self.reader.read(size)
+
+ def readline(self, size=None):
+
+ return self.reader.readline(size)
+
+ def readlines(self, sizehint=None):
+
+ return self.reader.readlines(sizehint)
+
+ def next(self):
+
+ """ Return the next decoded line from the input stream."""
+ return self.reader.next()
+
+ def __iter__(self):
+ return self
+
+ def write(self, data):
+
+ return self.writer.write(data)
+
+ def writelines(self, list):
+
+ return self.writer.writelines(list)
+
+ def reset(self):
+
+ self.reader.reset()
+ self.writer.reset()
+
+ def seek(self, offset, whence=0):
+ self.stream.seek(offset, whence)
+ self.reader.reset()
+ if whence == 0 and offset == 0:
+ self.writer.reset()
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ # these are needed to make "with codecs.open(...)" work properly
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamRecoder:
+
+ """ StreamRecoder instances provide a frontend - backend
+ view of encoding data.
+
+ They use the complete set of APIs returned by the
+ codecs.lookup() function to implement their task.
+
+ Data written to the stream is first decoded into an
+ intermediate format (which is dependent on the given codec
+ combination) and then written to the stream using an instance
+ of the provided Writer class.
+
+ In the other direction, data is read from the stream using a
+ Reader instance and then return encoded data to the caller.
+
+ """
+ # Optional attributes set by the file wrappers below
+ data_encoding = 'unknown'
+ file_encoding = 'unknown'
+
+ def __init__(self, stream, encode, decode, Reader, Writer,
+ errors='strict'):
+
+ """ Creates a StreamRecoder instance which implements a two-way
+ conversion: encode and decode work on the frontend (the
+ input to .read() and output of .write()) while
+ Reader and Writer work on the backend (reading and
+ writing to the stream).
+
+ You can use these objects to do transparent direct
+ recodings from e.g. latin-1 to utf-8 and back.
+
+ stream must be a file-like object.
+
+ encode, decode must adhere to the Codec interface, Reader,
+ Writer must be factory functions or classes providing the
+ StreamReader, StreamWriter interface resp.
+
+ encode and decode are needed for the frontend translation,
+ Reader and Writer for the backend translation. Unicode is
+ used as intermediate encoding.
+
+ Error handling is done in the same way as defined for the
+ StreamWriter/Readers.
+
+ """
+ self.stream = stream
+ self.encode = encode
+ self.decode = decode
+ self.reader = Reader(stream, errors)
+ self.writer = Writer(stream, errors)
+ self.errors = errors
+
+ def read(self, size=-1):
+
+ data = self.reader.read(size)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def readline(self, size=None):
+
+ if size is None:
+ data = self.reader.readline()
+ else:
+ data = self.reader.readline(size)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def readlines(self, sizehint=None):
+
+ data = self.reader.read()
+ data, bytesencoded = self.encode(data, self.errors)
+ return data.splitlines(1)
+
+ def next(self):
+
+ """ Return the next decoded line from the input stream."""
+ data = self.reader.next()
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def __iter__(self):
+ return self
+
+ def write(self, data):
+
+ data, bytesdecoded = self.decode(data, self.errors)
+ return self.writer.write(data)
+
+ def writelines(self, list):
+
+ data = ''.join(list)
+ data, bytesdecoded = self.decode(data, self.errors)
+ return self.writer.write(data)
+
+ def reset(self):
+
+ self.reader.reset()
+ self.writer.reset()
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+### Shortcuts
+
+def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
+
+ """ Open an encoded file using the given mode and return
+ a wrapped version providing transparent encoding/decoding.
+
+ Note: The wrapped version will only accept the object format
+ defined by the codecs, i.e. Unicode objects for most builtin
+ codecs. Output is also codec dependent and will usually be
+ Unicode as well.
+
+ Files are always opened in binary mode, even if no binary mode
+ was specified. This is done to avoid data loss due to encodings
+ using 8-bit values. The default file mode is 'rb' meaning to
+ open the file in binary read mode.
+
+ encoding specifies the encoding which is to be used for the
+ file.
+
+ errors may be given to define the error handling. It defaults
+ to 'strict' which causes ValueErrors to be raised in case an
+ encoding error occurs.
+
+ buffering has the same meaning as for the builtin open() API.
+ It defaults to line buffered.
+
+ The returned wrapped file object provides an extra attribute
+ .encoding which allows querying the used encoding. This
+ attribute is only available if an encoding was specified as
+ parameter.
+
+ """
+ if encoding is not None:
+ if 'U' in mode:
+ # No automatic conversion of '\n' is done on reading and writing
+ mode = mode.strip().replace('U', '')
+ if mode[:1] not in set('rwa'):
+ mode = 'r' + mode
+ if 'b' not in mode:
+ # Force opening of the file in binary mode
+ mode = mode + 'b'
+ file = __builtin__.open(filename, mode, buffering)
+ if encoding is None:
+ return file
+ info = lookup(encoding)
+ srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
+ # Add attributes to simplify introspection
+ srw.encoding = encoding
+ return srw
+
+def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
+
+ """ Return a wrapped version of file which provides transparent
+ encoding translation.
+
+ Strings written to the wrapped file are interpreted according
+ to the given data_encoding and then written to the original
+ file as string using file_encoding. The intermediate encoding
+ will usually be Unicode but depends on the specified codecs.
+
+ Strings are read from the file using file_encoding and then
+ passed back to the caller as string using data_encoding.
+
+ If file_encoding is not given, it defaults to data_encoding.
+
+ errors may be given to define the error handling. It defaults
+ to 'strict' which causes ValueErrors to be raised in case an
+ encoding error occurs.
+
+ The returned wrapped file object provides two extra attributes
+ .data_encoding and .file_encoding which reflect the given
+ parameters of the same name. The attributes can be used for
+ introspection by Python programs.
+
+ """
+ if file_encoding is None:
+ file_encoding = data_encoding
+ data_info = lookup(data_encoding)
+ file_info = lookup(file_encoding)
+ sr = StreamRecoder(file, data_info.encode, data_info.decode,
+ file_info.streamreader, file_info.streamwriter, errors)
+ # Add attributes to simplify introspection
+ sr.data_encoding = data_encoding
+ sr.file_encoding = file_encoding
+ return sr
+
+### Helpers for codec lookup
+
+def getencoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its encoder function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).encode
+
+def getdecoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its decoder function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).decode
+
+def getincrementalencoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalEncoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental encoder.
+
+ """
+ encoder = lookup(encoding).incrementalencoder
+ if encoder is None:
+ raise LookupError(encoding)
+ return encoder
+
+def getincrementaldecoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalDecoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental decoder.
+
+ """
+ decoder = lookup(encoding).incrementaldecoder
+ if decoder is None:
+ raise LookupError(encoding)
+ return decoder
+
+def getreader(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its StreamReader class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).streamreader
+
+def getwriter(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its StreamWriter class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).streamwriter
+
+def iterencode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Encoding iterator.
+
+ Encodes the input strings from the iterator using an IncrementalEncoder.
+
+ errors and kwargs are passed through to the IncrementalEncoder
+ constructor.
+ """
+ encoder = getincrementalencoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = encoder.encode(input)
+ if output:
+ yield output
+ output = encoder.encode("", True)
+ if output:
+ yield output
+
+def iterdecode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Decoding iterator.
+
+ Decodes the input strings from the iterator using an IncrementalDecoder.
+
+ errors and kwargs are passed through to the IncrementalDecoder
+ constructor.
+ """
+ decoder = getincrementaldecoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = decoder.decode(input)
+ if output:
+ yield output
+ output = decoder.decode("", True)
+ if output:
+ yield output
+
+### Helpers for charmap-based codecs
+
+def make_identity_dict(rng):
+
+ """ make_identity_dict(rng) -> dict
+
+ Return a dictionary where elements of the rng sequence are
+ mapped to themselves.
+
+ """
+ res = {}
+ for i in rng:
+ res[i]=i
+ return res
+
+def make_encoding_map(decoding_map):
+
+ """ Creates an encoding map from a decoding map.
+
+ If a target mapping in the decoding map occurs multiple
+ times, then that target is mapped to None (undefined mapping),
+ causing an exception when encountered by the charmap codec
+ during translation.
+
+ One example where this happens is cp875.py which decodes
+ multiple character to \\u001a.
+
+ """
+ m = {}
+ for k,v in decoding_map.items():
+ if not v in m:
+ m[v] = k
+ else:
+ m[v] = None
+ return m
+
+### error handlers
+
+try:
+ strict_errors = lookup_error("strict")
+ ignore_errors = lookup_error("ignore")
+ replace_errors = lookup_error("replace")
+ xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
+ backslashreplace_errors = lookup_error("backslashreplace")
+except LookupError:
+ # In --disable-unicode builds, these error handler are missing
+ strict_errors = None
+ ignore_errors = None
+ replace_errors = None
+ xmlcharrefreplace_errors = None
+ backslashreplace_errors = None
+
+# Tell modulefinder that using codecs probably needs the encodings
+# package
+_false = 0
+if _false:
+ import encodings
+
+### Tests
+
+if __name__ == '__main__':
+
+ # Make stdout translate Latin-1 output into UTF-8 output
+ sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
+
+ # Have stdin translate Latin-1 input into UTF-8 input
+ sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
diff --git a/cashew/Lib/codeop.py b/cashew/Lib/codeop.py
new file mode 100644
index 0000000..5616d92
--- /dev/null
+++ b/cashew/Lib/codeop.py
@@ -0,0 +1,168 @@
+r"""Utilities to compile possibly incomplete Python source code.
+
+This module provides two interfaces, broadly similar to the builtin
+function compile(), which take program text, a filename and a 'mode'
+and:
+
+- Return code object if the command is complete and valid
+- Return None if the command is incomplete
+- Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+
+Approach:
+
+First, check if the source consists entirely of blank lines and
+comments; if so, replace it with 'pass', because the built-in
+parser doesn't always do the right thing for these.
+
+Compile three times: as is, with \n, and with \n\n appended. If it
+compiles as is, it's complete. If it compiles with one \n appended,
+we expect more. If it doesn't compile either way, we compare the
+error we get when compiling with \n or \n\n appended. If the errors
+are the same, the code is broken. But if the errors are different, we
+expect more. Not intuitive; not even guaranteed to hold in future
+releases; but this matches the compiler's behavior from Python 1.4
+through 2.2, at least.
+
+Caveat:
+
+It is possible (but not likely) that the parser stops parsing with a
+successful outcome before reaching the end of the source; in this
+case, trailing symbols may be ignored instead of causing an error.
+For example, a backslash followed by two newlines may be followed by
+arbitrary garbage. This will be fixed once the API for the parser is
+better.
+
+The two interfaces are:
+
+compile_command(source, filename, symbol):
+
+ Compiles a single command in the manner described above.
+
+CommandCompiler():
+
+ Instances of this class have __call__ methods identical in
+ signature to compile_command; the difference is that if the
+ instance compiles program text containing a __future__ statement,
+ the instance 'remembers' and compiles all subsequent program texts
+ with the statement in force.
+
+The module also provides another class:
+
+Compile():
+
+ Instances of this class act like the built-in function compile,
+ but with 'memory' in the sense described above.
+"""
+
+import __future__
+
+_features = [getattr(__future__, fname)
+ for fname in __future__.all_feature_names]
+
+__all__ = ["compile_command", "Compile", "CommandCompiler"]
+
+PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h
+
+def _maybe_compile(compiler, source, filename, symbol):
+ # Check for source consisting of only blank lines and comments
+ for line in source.split("\n"):
+ line = line.strip()
+ if line and line[0] != '#':
+ break # Leave it alone
+ else:
+ if symbol != "eval":
+ source = "pass" # Replace it with a 'pass' statement
+
+ err = err1 = err2 = None
+ code = code1 = code2 = None
+
+ try:
+ code = compiler(source, filename, symbol)
+ except SyntaxError, err:
+ pass
+
+ try:
+ code1 = compiler(source + "\n", filename, symbol)
+ except SyntaxError, err1:
+ pass
+
+ try:
+ code2 = compiler(source + "\n\n", filename, symbol)
+ except SyntaxError, err2:
+ pass
+
+ if code:
+ return code
+ if not code1 and repr(err1) == repr(err2):
+ raise SyntaxError, err1
+
+def _compile(source, filename, symbol):
+ return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
+
+def compile_command(source, filename=" ", symbol="single"):
+ r"""Compile a command and determine whether it is incomplete.
+
+ Arguments:
+
+ source -- the source string; may contain \n characters
+ filename -- optional filename from which source was read; default
+ " "
+ symbol -- optional grammar start symbol; "single" (default) or "eval"
+
+ Return value / exceptions raised:
+
+ - Return a code object if the command is complete and valid
+ - Return None if the command is incomplete
+ - Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+ """
+ return _maybe_compile(_compile, source, filename, symbol)
+
+class Compile:
+ """Instances of this class behave much like the built-in compile
+ function, but if one is used to compile text containing a future
+ statement, it "remembers" and compiles all subsequent program texts
+ with the statement in force."""
+ def __init__(self):
+ self.flags = PyCF_DONT_IMPLY_DEDENT
+
+ def __call__(self, source, filename, symbol):
+ codeob = compile(source, filename, symbol, self.flags, 1)
+ for feature in _features:
+ if codeob.co_flags & feature.compiler_flag:
+ self.flags |= feature.compiler_flag
+ return codeob
+
+class CommandCompiler:
+ """Instances of this class have __call__ methods identical in
+ signature to compile_command; the difference is that if the
+ instance compiles program text containing a __future__ statement,
+ the instance 'remembers' and compiles all subsequent program texts
+ with the statement in force."""
+
+ def __init__(self,):
+ self.compiler = Compile()
+
+ def __call__(self, source, filename=" ", symbol="single"):
+ r"""Compile a command and determine whether it is incomplete.
+
+ Arguments:
+
+ source -- the source string; may contain \n characters
+ filename -- optional filename from which source was read;
+ default " "
+ symbol -- optional grammar start symbol; "single" (default) or
+ "eval"
+
+ Return value / exceptions raised:
+
+ - Return a code object if the command is complete and valid
+ - Return None if the command is incomplete
+ - Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+ """
+ return _maybe_compile(self.compiler, source, filename, symbol)
diff --git a/cashew/Lib/collections.py b/cashew/Lib/collections.py
new file mode 100644
index 0000000..f2ad972
--- /dev/null
+++ b/cashew/Lib/collections.py
@@ -0,0 +1,742 @@
+'''This module implements specialized container datatypes providing
+alternatives to Python's general purpose built-in containers, dict,
+list, set, and tuple.
+
+* namedtuple factory function for creating tuple subclasses with named fields
+* deque list-like container with fast appends and pops on either end
+* Counter dict subclass for counting hashable objects
+* OrderedDict dict subclass that remembers the order entries were added
+* defaultdict dict subclass that calls a factory function to supply missing values
+
+'''
+
+__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
+# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
+# They should however be considered an integral part of collections.py.
+from _abcoll import *
+import _abcoll
+__all__ += _abcoll.__all__
+
+from _collections import deque, defaultdict
+from operator import itemgetter as _itemgetter, eq as _eq
+from keyword import iskeyword as _iskeyword
+import sys as _sys
+import heapq as _heapq
+from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+from itertools import imap as _imap
+
+try:
+ from thread import get_ident as _get_ident
+except ImportError:
+ from dummy_thread import get_ident as _get_ident
+
+
+################################################################################
+### OrderedDict
+################################################################################
+
+class OrderedDict(dict):
+ 'Dictionary that remembers insertion order'
+ # An inherited dict maps keys to values.
+ # The inherited dict provides __getitem__, __len__, __contains__, and get.
+ # The remaining methods are order-aware.
+ # Big-O running times for all methods are the same as regular dictionaries.
+
+ # The internal self.__map dict maps keys to links in a doubly linked list.
+ # The circular doubly linked list starts and ends with a sentinel element.
+ # The sentinel element never gets deleted (this simplifies the algorithm).
+ # Each link is stored as a list of length three: [PREV, NEXT, KEY].
+
+ def __init__(*args, **kwds):
+ '''Initialize an ordered dictionary. The signature is the same as
+ regular dictionaries, but keyword arguments are not recommended because
+ their insertion order is arbitrary.
+
+ '''
+ if not args:
+ raise TypeError("descriptor '__init__' of 'OrderedDict' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ try:
+ self.__root
+ except AttributeError:
+ self.__root = root = [] # sentinel node
+ root[:] = [root, root, None]
+ self.__map = {}
+ self.__update(*args, **kwds)
+
+ def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
+ 'od.__setitem__(i, y) <==> od[i]=y'
+ # Setting a new item creates a new link at the end of the linked list,
+ # and the inherited dictionary is updated with the new key/value pair.
+ if key not in self:
+ root = self.__root
+ last = root[0]
+ last[1] = root[0] = self.__map[key] = [last, root, key]
+ return dict_setitem(self, key, value)
+
+ def __delitem__(self, key, dict_delitem=dict.__delitem__):
+ 'od.__delitem__(y) <==> del od[y]'
+ # Deleting an existing item uses self.__map to find the link which gets
+ # removed by updating the links in the predecessor and successor nodes.
+ dict_delitem(self, key)
+ link_prev, link_next, _ = self.__map.pop(key)
+ link_prev[1] = link_next # update link_prev[NEXT]
+ link_next[0] = link_prev # update link_next[PREV]
+
+ def __iter__(self):
+ 'od.__iter__() <==> iter(od)'
+ # Traverse the linked list in order.
+ root = self.__root
+ curr = root[1] # start at the first node
+ while curr is not root:
+ yield curr[2] # yield the curr[KEY]
+ curr = curr[1] # move to next node
+
+ def __reversed__(self):
+ 'od.__reversed__() <==> reversed(od)'
+ # Traverse the linked list in reverse order.
+ root = self.__root
+ curr = root[0] # start at the last node
+ while curr is not root:
+ yield curr[2] # yield the curr[KEY]
+ curr = curr[0] # move to previous node
+
+ def clear(self):
+ 'od.clear() -> None. Remove all items from od.'
+ root = self.__root
+ root[:] = [root, root, None]
+ self.__map.clear()
+ dict.clear(self)
+
+ # -- the following methods do not depend on the internal structure --
+
+ def keys(self):
+ 'od.keys() -> list of keys in od'
+ return list(self)
+
+ def values(self):
+ 'od.values() -> list of values in od'
+ return [self[key] for key in self]
+
+ def items(self):
+ 'od.items() -> list of (key, value) pairs in od'
+ return [(key, self[key]) for key in self]
+
+ def iterkeys(self):
+ 'od.iterkeys() -> an iterator over the keys in od'
+ return iter(self)
+
+ def itervalues(self):
+ 'od.itervalues -> an iterator over the values in od'
+ for k in self:
+ yield self[k]
+
+ def iteritems(self):
+ 'od.iteritems -> an iterator over the (key, value) pairs in od'
+ for k in self:
+ yield (k, self[k])
+
+ update = MutableMapping.update
+
+ __update = update # let subclasses override update without breaking __init__
+
+ __marker = object()
+
+ def pop(self, key, default=__marker):
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
+ value. If key is not found, d is returned if given, otherwise KeyError
+ is raised.
+
+ '''
+ if key in self:
+ result = self[key]
+ del self[key]
+ return result
+ if default is self.__marker:
+ raise KeyError(key)
+ return default
+
+ def setdefault(self, key, default=None):
+ 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+ if key in self:
+ return self[key]
+ self[key] = default
+ return default
+
+ def popitem(self, last=True):
+ '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+ Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+ '''
+ if not self:
+ raise KeyError('dictionary is empty')
+ key = next(reversed(self) if last else iter(self))
+ value = self.pop(key)
+ return key, value
+
+ def __repr__(self, _repr_running={}):
+ 'od.__repr__() <==> repr(od)'
+ call_key = id(self), _get_ident()
+ if call_key in _repr_running:
+ return '...'
+ _repr_running[call_key] = 1
+ try:
+ if not self:
+ return '%s()' % (self.__class__.__name__,)
+ return '%s(%r)' % (self.__class__.__name__, self.items())
+ finally:
+ del _repr_running[call_key]
+
+ def __reduce__(self):
+ 'Return state information for pickling'
+ items = [[k, self[k]] for k in self]
+ inst_dict = vars(self).copy()
+ for k in vars(OrderedDict()):
+ inst_dict.pop(k, None)
+ if inst_dict:
+ return (self.__class__, (items,), inst_dict)
+ return self.__class__, (items,)
+
+ def copy(self):
+ 'od.copy() -> a shallow copy of od'
+ return self.__class__(self)
+
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
+ If not specified, the value defaults to None.
+
+ '''
+ self = cls()
+ for key in iterable:
+ self[key] = value
+ return self
+
+ def __eq__(self, other):
+ '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
+ while comparison to a regular mapping is order-insensitive.
+
+ '''
+ if isinstance(other, OrderedDict):
+ return dict.__eq__(self, other) and all(_imap(_eq, self, other))
+ return dict.__eq__(self, other)
+
+ def __ne__(self, other):
+ 'od.__ne__(y) <==> od!=y'
+ return not self == other
+
+ # -- the following methods support python 3.x style dictionary views --
+
+ def viewkeys(self):
+ "od.viewkeys() -> a set-like object providing a view on od's keys"
+ return KeysView(self)
+
+ def viewvalues(self):
+ "od.viewvalues() -> an object providing a view on od's values"
+ return ValuesView(self)
+
+ def viewitems(self):
+ "od.viewitems() -> a set-like object providing a view on od's items"
+ return ItemsView(self)
+
+
+################################################################################
+### namedtuple
+################################################################################
+
+_class_template = '''\
+class {typename}(tuple):
+ '{typename}({arg_list})'
+
+ __slots__ = ()
+
+ _fields = {field_names!r}
+
+ def __new__(_cls, {arg_list}):
+ 'Create new instance of {typename}({arg_list})'
+ return _tuple.__new__(_cls, ({arg_list}))
+
+ @classmethod
+ def _make(cls, iterable, new=tuple.__new__, len=len):
+ 'Make a new {typename} object from a sequence or iterable'
+ result = new(cls, iterable)
+ if len(result) != {num_fields:d}:
+ raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
+ return result
+
+ def __repr__(self):
+ 'Return a nicely formatted representation string'
+ return '{typename}({repr_fmt})' % self
+
+ def _asdict(self):
+ 'Return a new OrderedDict which maps field names to their values'
+ return OrderedDict(zip(self._fields, self))
+
+ def _replace(_self, **kwds):
+ 'Return a new {typename} object replacing specified fields with new values'
+ result = _self._make(map(kwds.pop, {field_names!r}, _self))
+ if kwds:
+ raise ValueError('Got unexpected field names: %r' % kwds.keys())
+ return result
+
+ def __getnewargs__(self):
+ 'Return self as a plain tuple. Used by copy and pickle.'
+ return tuple(self)
+
+ __dict__ = _property(_asdict)
+
+ def __getstate__(self):
+ 'Exclude the OrderedDict from pickling'
+ pass
+
+{field_defs}
+'''
+
+_repr_template = '{name}=%r'
+
+_field_template = '''\
+ {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
+'''
+
+def namedtuple(typename, field_names, verbose=False, rename=False):
+ """Returns a new subclass of tuple with named fields.
+
+ >>> Point = namedtuple('Point', ['x', 'y'])
+ >>> Point.__doc__ # docstring for the new class
+ 'Point(x, y)'
+ >>> p = Point(11, y=22) # instantiate with positional args or keywords
+ >>> p[0] + p[1] # indexable like a plain tuple
+ 33
+ >>> x, y = p # unpack like a regular tuple
+ >>> x, y
+ (11, 22)
+ >>> p.x + p.y # fields also accessible by name
+ 33
+ >>> d = p._asdict() # convert to a dictionary
+ >>> d['x']
+ 11
+ >>> Point(**d) # convert from a dictionary
+ Point(x=11, y=22)
+ >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
+ Point(x=100, y=22)
+
+ """
+
+ # Validate the field names. At the user's option, either generate an error
+ # message or automatically replace the field name with a valid name.
+ if isinstance(field_names, basestring):
+ field_names = field_names.replace(',', ' ').split()
+ field_names = map(str, field_names)
+ typename = str(typename)
+ if rename:
+ seen = set()
+ for index, name in enumerate(field_names):
+ if (not all(c.isalnum() or c=='_' for c in name)
+ or _iskeyword(name)
+ or not name
+ or name[0].isdigit()
+ or name.startswith('_')
+ or name in seen):
+ field_names[index] = '_%d' % index
+ seen.add(name)
+ for name in [typename] + field_names:
+ if type(name) != str:
+ raise TypeError('Type names and field names must be strings')
+ if not all(c.isalnum() or c=='_' for c in name):
+ raise ValueError('Type names and field names can only contain '
+ 'alphanumeric characters and underscores: %r' % name)
+ if _iskeyword(name):
+ raise ValueError('Type names and field names cannot be a '
+ 'keyword: %r' % name)
+ if name[0].isdigit():
+ raise ValueError('Type names and field names cannot start with '
+ 'a number: %r' % name)
+ seen = set()
+ for name in field_names:
+ if name.startswith('_') and not rename:
+ raise ValueError('Field names cannot start with an underscore: '
+ '%r' % name)
+ if name in seen:
+ raise ValueError('Encountered duplicate field name: %r' % name)
+ seen.add(name)
+
+ # Fill-in the class template
+ class_definition = _class_template.format(
+ typename = typename,
+ field_names = tuple(field_names),
+ num_fields = len(field_names),
+ arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
+ repr_fmt = ', '.join(_repr_template.format(name=name)
+ for name in field_names),
+ field_defs = '\n'.join(_field_template.format(index=index, name=name)
+ for index, name in enumerate(field_names))
+ )
+ if verbose:
+ print class_definition
+
+ # Execute the template string in a temporary namespace and support
+ # tracing utilities by setting a value for frame.f_globals['__name__']
+ namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
+ OrderedDict=OrderedDict, _property=property, _tuple=tuple)
+ try:
+ exec class_definition in namespace
+ except SyntaxError as e:
+ raise SyntaxError(e.message + ':\n' + class_definition)
+ result = namespace[typename]
+
+ # For pickling to work, the __module__ variable needs to be set to the frame
+ # where the named tuple is created. Bypass this step in environments where
+ # sys._getframe is not defined (Jython for example) or sys._getframe is not
+ # defined for arguments greater than 0 (IronPython).
+ try:
+ result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
+ except (AttributeError, ValueError):
+ pass
+
+ return result
+
+
+########################################################################
+### Counter
+########################################################################
+
+class Counter(dict):
+ '''Dict subclass for counting hashable items. Sometimes called a bag
+ or multiset. Elements are stored as dictionary keys and their counts
+ are stored as dictionary values.
+
+ >>> c = Counter('abcdeabcdabcaba') # count elements from a string
+
+ >>> c.most_common(3) # three most common elements
+ [('a', 5), ('b', 4), ('c', 3)]
+ >>> sorted(c) # list all unique elements
+ ['a', 'b', 'c', 'd', 'e']
+ >>> ''.join(sorted(c.elements())) # list elements with repetitions
+ 'aaaaabbbbcccdde'
+ >>> sum(c.values()) # total of all counts
+ 15
+
+ >>> c['a'] # count of letter 'a'
+ 5
+ >>> for elem in 'shazam': # update counts from an iterable
+ ... c[elem] += 1 # by adding 1 to each element's count
+ >>> c['a'] # now there are seven 'a'
+ 7
+ >>> del c['b'] # remove all 'b'
+ >>> c['b'] # now there are zero 'b'
+ 0
+
+ >>> d = Counter('simsalabim') # make another counter
+ >>> c.update(d) # add in the second counter
+ >>> c['a'] # now there are nine 'a'
+ 9
+
+ >>> c.clear() # empty the counter
+ >>> c
+ Counter()
+
+ Note: If a count is set to zero or reduced to zero, it will remain
+ in the counter until the entry is deleted or the counter is cleared:
+
+ >>> c = Counter('aaabbc')
+ >>> c['b'] -= 2 # reduce the count of 'b' by two
+ >>> c.most_common() # 'b' is still in, but its count is zero
+ [('a', 3), ('c', 1), ('b', 0)]
+
+ '''
+ # References:
+ # http://en.wikipedia.org/wiki/Multiset
+ # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+ # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+ # http://code.activestate.com/recipes/259174/
+ # Knuth, TAOCP Vol. II section 4.6.3
+
+ def __init__(*args, **kwds):
+ '''Create a new, empty Counter object. And if given, count elements
+ from an input iterable. Or, initialize the count from another mapping
+ of elements to their counts.
+
+ >>> c = Counter() # a new, empty counter
+ >>> c = Counter('gallahad') # a new counter from an iterable
+ >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+ >>> c = Counter(a=4, b=2) # a new counter from keyword args
+
+ '''
+ if not args:
+ raise TypeError("descriptor '__init__' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ super(Counter, self).__init__()
+ self.update(*args, **kwds)
+
+ def __missing__(self, key):
+ 'The count of elements not in the Counter is zero.'
+ # Needed so that self[missing_item] does not raise KeyError
+ return 0
+
+ def most_common(self, n=None):
+ '''List the n most common elements and their counts from the most
+ common to the least. If n is None, then list all element counts.
+
+ >>> Counter('abcdeabcdabcaba').most_common(3)
+ [('a', 5), ('b', 4), ('c', 3)]
+
+ '''
+ # Emulate Bag.sortedByCount from Smalltalk
+ if n is None:
+ return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
+ return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
+
+ def elements(self):
+ '''Iterator over elements repeating each as many times as its count.
+
+ >>> c = Counter('ABCABC')
+ >>> sorted(c.elements())
+ ['A', 'A', 'B', 'B', 'C', 'C']
+
+ # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
+ >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
+ >>> product = 1
+ >>> for factor in prime_factors.elements(): # loop over factors
+ ... product *= factor # and multiply them
+ >>> product
+ 1836
+
+ Note, if an element's count has been set to zero or is a negative
+ number, elements() will ignore it.
+
+ '''
+ # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+ return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
+
+ # Override dict methods where necessary
+
+ @classmethod
+ def fromkeys(cls, iterable, v=None):
+ # There is no equivalent method for counters because setting v=1
+ # means that no element can have a count greater than one.
+ raise NotImplementedError(
+ 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
+
+ def update(*args, **kwds):
+ '''Like dict.update() but add counts instead of replacing them.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.update('witch') # add elements from another iterable
+ >>> d = Counter('watch')
+ >>> c.update(d) # add elements from another counter
+ >>> c['h'] # four 'h' in which, witch, and watch
+ 4
+
+ '''
+ # The regular dict.update() operation makes no sense here because the
+ # replace behavior results in the some of original untouched counts
+ # being mixed-in with all of the other counts for a mismash that
+ # doesn't have a straight-forward interpretation in most counting
+ # contexts. Instead, we implement straight-addition. Both the inputs
+ # and outputs are allowed to contain zero and negative counts.
+
+ if not args:
+ raise TypeError("descriptor 'update' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ iterable = args[0] if args else None
+ if iterable is not None:
+ if isinstance(iterable, Mapping):
+ if self:
+ self_get = self.get
+ for elem, count in iterable.iteritems():
+ self[elem] = self_get(elem, 0) + count
+ else:
+ super(Counter, self).update(iterable) # fast path when counter is empty
+ else:
+ self_get = self.get
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) + 1
+ if kwds:
+ self.update(kwds)
+
+ def subtract(*args, **kwds):
+ '''Like dict.update() but subtracts counts instead of replacing them.
+ Counts can be reduced below zero. Both the inputs and outputs are
+ allowed to contain zero and negative counts.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.subtract('witch') # subtract elements from another iterable
+ >>> c.subtract(Counter('watch')) # subtract elements from another counter
+ >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
+ 0
+ >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
+ -1
+
+ '''
+ if not args:
+ raise TypeError("descriptor 'subtract' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ iterable = args[0] if args else None
+ if iterable is not None:
+ self_get = self.get
+ if isinstance(iterable, Mapping):
+ for elem, count in iterable.items():
+ self[elem] = self_get(elem, 0) - count
+ else:
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) - 1
+ if kwds:
+ self.subtract(kwds)
+
+ def copy(self):
+ 'Return a shallow copy.'
+ return self.__class__(self)
+
+ def __reduce__(self):
+ return self.__class__, (dict(self),)
+
+ def __delitem__(self, elem):
+ 'Like dict.__delitem__() but does not raise KeyError for missing values.'
+ if elem in self:
+ super(Counter, self).__delitem__(elem)
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % self.__class__.__name__
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+
+ # Multiset-style mathematical operations discussed in:
+ # Knuth TAOCP Volume II section 4.6.3 exercise 19
+ # and at http://en.wikipedia.org/wiki/Multiset
+ #
+ # Outputs guaranteed to only include positive counts.
+ #
+ # To strip negative and zero counts, add-in an empty counter:
+ # c += Counter()
+
+ def __add__(self, other):
+ '''Add counts from two counters.
+
+ >>> Counter('abbb') + Counter('bcc')
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count + other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __sub__(self, other):
+ ''' Subtract count, but keep only results with positive counts.
+
+ >>> Counter('abbbc') - Counter('bccd')
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count - other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count < 0:
+ result[elem] = 0 - count
+ return result
+
+ def __or__(self, other):
+ '''Union is the maximum of value in either of the input counters.
+
+ >>> Counter('abbb') | Counter('bcc')
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = other_count if count < other_count else count
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __and__(self, other):
+ ''' Intersection is the minimum of corresponding counts.
+
+ >>> Counter('abbb') & Counter('bcc')
+ Counter({'b': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = count if count < other_count else other_count
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+
+if __name__ == '__main__':
+ # verify that instances can be pickled
+ from cPickle import loads, dumps
+ Point = namedtuple('Point', 'x, y', True)
+ p = Point(x=10, y=20)
+ assert p == loads(dumps(p))
+
+ # test and demonstrate ability to override methods
+ class Point(namedtuple('Point', 'x y')):
+ __slots__ = ()
+ @property
+ def hypot(self):
+ return (self.x ** 2 + self.y ** 2) ** 0.5
+ def __str__(self):
+ return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
+
+ for p in Point(3, 4), Point(14, 5/7.):
+ print p
+
+ class Point(namedtuple('Point', 'x y')):
+ 'Point class with optimized _make() and _replace() without error-checking'
+ __slots__ = ()
+ _make = classmethod(tuple.__new__)
+ def _replace(self, _map=map, **kwds):
+ return self._make(_map(kwds.get, ('x', 'y'), self))
+
+ print Point(11, 22)._replace(x=100)
+
+ Point3D = namedtuple('Point3D', Point._fields + ('z',))
+ print Point3D.__doc__
+
+ import doctest
+ TestResults = namedtuple('TestResults', 'failed attempted')
+ print TestResults(*doctest.testmod())
diff --git a/cashew/Lib/colorsys.py b/cashew/Lib/colorsys.py
new file mode 100644
index 0000000..a6c0cf6
--- /dev/null
+++ b/cashew/Lib/colorsys.py
@@ -0,0 +1,156 @@
+"""Conversion functions between RGB and other color systems.
+
+This modules provides two functions for each color system ABC:
+
+ rgb_to_abc(r, g, b) --> a, b, c
+ abc_to_rgb(a, b, c) --> r, g, b
+
+All inputs and outputs are triples of floats in the range [0.0...1.0]
+(with the exception of I and Q, which covers a slightly larger range).
+Inputs outside the valid range may cause exceptions or invalid outputs.
+
+Supported color systems:
+RGB: Red, Green, Blue components
+YIQ: Luminance, Chrominance (used by composite video signals)
+HLS: Hue, Luminance, Saturation
+HSV: Hue, Saturation, Value
+"""
+
+# References:
+# http://en.wikipedia.org/wiki/YIQ
+# http://en.wikipedia.org/wiki/HLS_color_space
+# http://en.wikipedia.org/wiki/HSV_color_space
+
+__all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb",
+ "rgb_to_hsv","hsv_to_rgb"]
+
+# Some floating point constants
+
+ONE_THIRD = 1.0/3.0
+ONE_SIXTH = 1.0/6.0
+TWO_THIRD = 2.0/3.0
+
+# YIQ: used by composite video signals (linear combinations of RGB)
+# Y: perceived grey level (0.0 == black, 1.0 == white)
+# I, Q: color components
+
+def rgb_to_yiq(r, g, b):
+ y = 0.30*r + 0.59*g + 0.11*b
+ i = 0.60*r - 0.28*g - 0.32*b
+ q = 0.21*r - 0.52*g + 0.31*b
+ return (y, i, q)
+
+def yiq_to_rgb(y, i, q):
+ r = y + 0.948262*i + 0.624013*q
+ g = y - 0.276066*i - 0.639810*q
+ b = y - 1.105450*i + 1.729860*q
+ if r < 0.0:
+ r = 0.0
+ if g < 0.0:
+ g = 0.0
+ if b < 0.0:
+ b = 0.0
+ if r > 1.0:
+ r = 1.0
+ if g > 1.0:
+ g = 1.0
+ if b > 1.0:
+ b = 1.0
+ return (r, g, b)
+
+
+# HLS: Hue, Luminance, Saturation
+# H: position in the spectrum
+# L: color lightness
+# S: color saturation
+
+def rgb_to_hls(r, g, b):
+ maxc = max(r, g, b)
+ minc = min(r, g, b)
+ # XXX Can optimize (maxc+minc) and (maxc-minc)
+ l = (minc+maxc)/2.0
+ if minc == maxc:
+ return 0.0, l, 0.0
+ if l <= 0.5:
+ s = (maxc-minc) / (maxc+minc)
+ else:
+ s = (maxc-minc) / (2.0-maxc-minc)
+ rc = (maxc-r) / (maxc-minc)
+ gc = (maxc-g) / (maxc-minc)
+ bc = (maxc-b) / (maxc-minc)
+ if r == maxc:
+ h = bc-gc
+ elif g == maxc:
+ h = 2.0+rc-bc
+ else:
+ h = 4.0+gc-rc
+ h = (h/6.0) % 1.0
+ return h, l, s
+
+def hls_to_rgb(h, l, s):
+ if s == 0.0:
+ return l, l, l
+ if l <= 0.5:
+ m2 = l * (1.0+s)
+ else:
+ m2 = l+s-(l*s)
+ m1 = 2.0*l - m2
+ return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD))
+
+def _v(m1, m2, hue):
+ hue = hue % 1.0
+ if hue < ONE_SIXTH:
+ return m1 + (m2-m1)*hue*6.0
+ if hue < 0.5:
+ return m2
+ if hue < TWO_THIRD:
+ return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0
+ return m1
+
+
+# HSV: Hue, Saturation, Value
+# H: position in the spectrum
+# S: color saturation ("purity")
+# V: color brightness
+
+def rgb_to_hsv(r, g, b):
+ maxc = max(r, g, b)
+ minc = min(r, g, b)
+ v = maxc
+ if minc == maxc:
+ return 0.0, 0.0, v
+ s = (maxc-minc) / maxc
+ rc = (maxc-r) / (maxc-minc)
+ gc = (maxc-g) / (maxc-minc)
+ bc = (maxc-b) / (maxc-minc)
+ if r == maxc:
+ h = bc-gc
+ elif g == maxc:
+ h = 2.0+rc-bc
+ else:
+ h = 4.0+gc-rc
+ h = (h/6.0) % 1.0
+ return h, s, v
+
+def hsv_to_rgb(h, s, v):
+ if s == 0.0:
+ return v, v, v
+ i = int(h*6.0) # XXX assume int() truncates!
+ f = (h*6.0) - i
+ p = v*(1.0 - s)
+ q = v*(1.0 - s*f)
+ t = v*(1.0 - s*(1.0-f))
+ i = i%6
+ if i == 0:
+ return v, t, p
+ if i == 1:
+ return q, v, p
+ if i == 2:
+ return p, v, t
+ if i == 3:
+ return p, q, v
+ if i == 4:
+ return t, p, v
+ if i == 5:
+ return v, p, q
+ # Cannot get here
diff --git a/cashew/Lib/commands.py b/cashew/Lib/commands.py
new file mode 100644
index 0000000..d0e8dd5
--- /dev/null
+++ b/cashew/Lib/commands.py
@@ -0,0 +1,90 @@
+"""Execute shell commands via os.popen() and return status, output.
+
+Interface summary:
+
+ import commands
+
+ outtext = commands.getoutput(cmd)
+ (exitstatus, outtext) = commands.getstatusoutput(cmd)
+ outtext = commands.getstatus(file) # returns output of "ls -ld file"
+
+A trailing newline is removed from the output string.
+
+Encapsulates the basic operation:
+
+ pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r')
+ text = pipe.read()
+ sts = pipe.close()
+
+ [Note: it would be nice to add functions to interpret the exit status.]
+"""
+from warnings import warnpy3k
+warnpy3k("the commands module has been removed in Python 3.0; "
+ "use the subprocess module instead", stacklevel=2)
+del warnpy3k
+
+__all__ = ["getstatusoutput","getoutput","getstatus"]
+
+# Module 'commands'
+#
+# Various tools for executing commands and looking at their output and status.
+#
+# NB This only works (and is only relevant) for UNIX.
+
+
+# Get 'ls -l' status for an object into a string
+#
+def getstatus(file):
+ """Return output of "ls -ld " in a string."""
+ import warnings
+ warnings.warn("commands.getstatus() is deprecated", DeprecationWarning, 2)
+ return getoutput('ls -ld' + mkarg(file))
+
+
+# Get the output from a shell command into a string.
+# The exit status is ignored; a trailing newline is stripped.
+# Assume the command will work with '{ ... ; } 2>&1' around it..
+#
+def getoutput(cmd):
+ """Return output (stdout or stderr) of executing cmd in a shell."""
+ return getstatusoutput(cmd)[1]
+
+
+# Ditto but preserving the exit status.
+# Returns a pair (sts, output)
+#
+def getstatusoutput(cmd):
+ """Return (status, output) of executing cmd in a shell."""
+ import os
+ pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r')
+ text = pipe.read()
+ sts = pipe.close()
+ if sts is None: sts = 0
+ if text[-1:] == '\n': text = text[:-1]
+ return sts, text
+
+
+# Make command argument from directory and pathname (prefix space, add quotes).
+#
+def mk2arg(head, x):
+ import os
+ return mkarg(os.path.join(head, x))
+
+
+# Make a shell command argument from a string.
+# Return a string beginning with a space followed by a shell-quoted
+# version of the argument.
+# Two strategies: enclose in single quotes if it contains none;
+# otherwise, enclose in double quotes and prefix quotable characters
+# with backslash.
+#
+def mkarg(x):
+ if '\'' not in x:
+ return ' \'' + x + '\''
+ s = ' "'
+ for c in x:
+ if c in '\\$"`':
+ s = s + '\\'
+ s = s + c
+ s = s + '"'
+ return s
diff --git a/cashew/Lib/compileall.py b/cashew/Lib/compileall.py
new file mode 100644
index 0000000..5cfa8be
--- /dev/null
+++ b/cashew/Lib/compileall.py
@@ -0,0 +1,227 @@
+"""Module/script to byte-compile all .py files to .pyc (or .pyo) files.
+
+When called as a script with arguments, this compiles the directories
+given as arguments recursively; the -l option prevents it from
+recursing into directories.
+
+Without arguments, if compiles all modules on sys.path, without
+recursing into subdirectories. (Even though it should do so for
+packages -- for now, you'll have to deal with packages separately.)
+
+See module py_compile for details of the actual byte-compilation.
+"""
+import os
+import sys
+import py_compile
+import struct
+import imp
+
+__all__ = ["compile_dir","compile_file","compile_path"]
+
+def compile_dir(dir, maxlevels=10, ddir=None,
+ force=0, rx=None, quiet=0):
+ """Byte-compile all modules in the given directory tree.
+
+ Arguments (only dir is required):
+
+ dir: the directory to byte-compile
+ maxlevels: maximum recursion level (default 10)
+ ddir: the directory that will be prepended to the path to the
+ file as it is compiled into each byte-code file.
+ force: if 1, force compilation, even if timestamps are up-to-date
+ quiet: if 1, be quiet during compilation
+ """
+ if not quiet:
+ print 'Listing', dir, '...'
+ try:
+ names = os.listdir(dir)
+ except os.error:
+ print "Can't list", dir
+ names = []
+ names.sort()
+ success = 1
+ for name in names:
+ fullname = os.path.join(dir, name)
+ if ddir is not None:
+ dfile = os.path.join(ddir, name)
+ else:
+ dfile = None
+ if not os.path.isdir(fullname):
+ if not compile_file(fullname, ddir, force, rx, quiet):
+ success = 0
+ elif maxlevels > 0 and \
+ name != os.curdir and name != os.pardir and \
+ os.path.isdir(fullname) and \
+ not os.path.islink(fullname):
+ if not compile_dir(fullname, maxlevels - 1, dfile, force, rx,
+ quiet):
+ success = 0
+ return success
+
+def compile_file(fullname, ddir=None, force=0, rx=None, quiet=0):
+ """Byte-compile one file.
+
+ Arguments (only fullname is required):
+
+ fullname: the file to byte-compile
+ ddir: if given, the directory name compiled in to the
+ byte-code file.
+ force: if 1, force compilation, even if timestamps are up-to-date
+ quiet: if 1, be quiet during compilation
+ """
+ success = 1
+ name = os.path.basename(fullname)
+ if ddir is not None:
+ dfile = os.path.join(ddir, name)
+ else:
+ dfile = None
+ if rx is not None:
+ mo = rx.search(fullname)
+ if mo:
+ return success
+ if os.path.isfile(fullname):
+ head, tail = name[:-3], name[-3:]
+ if tail == '.py':
+ if not force:
+ try:
+ mtime = int(os.stat(fullname).st_mtime)
+ expect = struct.pack('<4sl', imp.get_magic(), mtime)
+ cfile = fullname + (__debug__ and 'c' or 'o')
+ with open(cfile, 'rb') as chandle:
+ actual = chandle.read(8)
+ if expect == actual:
+ return success
+ except IOError:
+ pass
+ if not quiet:
+ print 'Compiling', fullname, '...'
+ try:
+ ok = py_compile.compile(fullname, None, dfile, True)
+ except py_compile.PyCompileError,err:
+ if quiet:
+ print 'Compiling', fullname, '...'
+ print err.msg
+ success = 0
+ except IOError, e:
+ print "Sorry", e
+ success = 0
+ else:
+ if ok == 0:
+ success = 0
+ return success
+
+def compile_path(skip_curdir=1, maxlevels=0, force=0, quiet=0):
+ """Byte-compile all module on sys.path.
+
+ Arguments (all optional):
+
+ skip_curdir: if true, skip current directory (default true)
+ maxlevels: max recursion level (default 0)
+ force: as for compile_dir() (default 0)
+ quiet: as for compile_dir() (default 0)
+ """
+ success = 1
+ for dir in sys.path:
+ if (not dir or dir == os.curdir) and skip_curdir:
+ print 'Skipping current directory'
+ else:
+ success = success and compile_dir(dir, maxlevels, None,
+ force, quiet=quiet)
+ return success
+
+def expand_args(args, flist):
+ """read names in flist and append to args"""
+ expanded = args[:]
+ if flist:
+ try:
+ if flist == '-':
+ fd = sys.stdin
+ else:
+ fd = open(flist)
+ while 1:
+ line = fd.readline()
+ if not line:
+ break
+ expanded.append(line[:-1])
+ except IOError:
+ print "Error reading file list %s" % flist
+ raise
+ return expanded
+
+def main():
+ """Script main program."""
+ import getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'lfqd:x:i:')
+ except getopt.error, msg:
+ print msg
+ print "usage: python compileall.py [-l] [-f] [-q] [-d destdir] " \
+ "[-x regexp] [-i list] [directory|file ...]"
+ print
+ print "arguments: zero or more file and directory names to compile; " \
+ "if no arguments given, "
+ print " defaults to the equivalent of -l sys.path"
+ print
+ print "options:"
+ print "-l: don't recurse into subdirectories"
+ print "-f: force rebuild even if timestamps are up-to-date"
+ print "-q: output only error messages"
+ print "-d destdir: directory to prepend to file paths for use in " \
+ "compile-time tracebacks and in"
+ print " runtime tracebacks in cases where the source " \
+ "file is unavailable"
+ print "-x regexp: skip files matching the regular expression regexp; " \
+ "the regexp is searched for"
+ print " in the full path of each file considered for " \
+ "compilation"
+ print "-i file: add all the files and directories listed in file to " \
+ "the list considered for"
+ print ' compilation; if "-", names are read from stdin'
+
+ sys.exit(2)
+ maxlevels = 10
+ ddir = None
+ force = 0
+ quiet = 0
+ rx = None
+ flist = None
+ for o, a in opts:
+ if o == '-l': maxlevels = 0
+ if o == '-d': ddir = a
+ if o == '-f': force = 1
+ if o == '-q': quiet = 1
+ if o == '-x':
+ import re
+ rx = re.compile(a)
+ if o == '-i': flist = a
+ if ddir:
+ if len(args) != 1 and not os.path.isdir(args[0]):
+ print "-d destdir require exactly one directory argument"
+ sys.exit(2)
+ success = 1
+ try:
+ if args or flist:
+ try:
+ if flist:
+ args = expand_args(args, flist)
+ except IOError:
+ success = 0
+ if success:
+ for arg in args:
+ if os.path.isdir(arg):
+ if not compile_dir(arg, maxlevels, ddir,
+ force, rx, quiet):
+ success = 0
+ else:
+ if not compile_file(arg, ddir, force, rx, quiet):
+ success = 0
+ else:
+ success = compile_path()
+ except KeyboardInterrupt:
+ print "\n[interrupted]"
+ success = 0
+ return success
+
+if __name__ == '__main__':
+ exit_status = int(not main())
+ sys.exit(exit_status)
diff --git a/cashew/Lib/contextlib.py b/cashew/Lib/contextlib.py
new file mode 100644
index 0000000..f05205b
--- /dev/null
+++ b/cashew/Lib/contextlib.py
@@ -0,0 +1,154 @@
+"""Utilities for with-statement contexts. See PEP 343."""
+
+import sys
+from functools import wraps
+from warnings import warn
+
+__all__ = ["contextmanager", "nested", "closing"]
+
+class GeneratorContextManager(object):
+ """Helper for @contextmanager decorator."""
+
+ def __init__(self, gen):
+ self.gen = gen
+
+ def __enter__(self):
+ try:
+ return self.gen.next()
+ except StopIteration:
+ raise RuntimeError("generator didn't yield")
+
+ def __exit__(self, type, value, traceback):
+ if type is None:
+ try:
+ self.gen.next()
+ except StopIteration:
+ return
+ else:
+ raise RuntimeError("generator didn't stop")
+ else:
+ if value is None:
+ # Need to force instantiation so we can reliably
+ # tell if we get the same exception back
+ value = type()
+ try:
+ self.gen.throw(type, value, traceback)
+ raise RuntimeError("generator didn't stop after throw()")
+ except StopIteration, exc:
+ # Suppress the exception *unless* it's the same exception that
+ # was passed to throw(). This prevents a StopIteration
+ # raised inside the "with" statement from being suppressed
+ return exc is not value
+ except:
+ # only re-raise if it's *not* the exception that was
+ # passed to throw(), because __exit__() must not raise
+ # an exception unless __exit__() itself failed. But throw()
+ # has to raise the exception to signal propagation, so this
+ # fixes the impedance mismatch between the throw() protocol
+ # and the __exit__() protocol.
+ #
+ if sys.exc_info()[1] is not value:
+ raise
+
+
+def contextmanager(func):
+ """@contextmanager decorator.
+
+ Typical usage:
+
+ @contextmanager
+ def some_generator():
+
+ try:
+ yield
+ finally:
+
+
+ This makes this:
+
+ with some_generator() as :
+
+
+ equivalent to this:
+
+
+ try:
+ =
+
+ finally:
+
+
+ """
+ @wraps(func)
+ def helper(*args, **kwds):
+ return GeneratorContextManager(func(*args, **kwds))
+ return helper
+
+
+@contextmanager
+def nested(*managers):
+ """Combine multiple context managers into a single nested context manager.
+
+ This function has been deprecated in favour of the multiple manager form
+ of the with statement.
+
+ The one advantage of this function over the multiple manager form of the
+ with statement is that argument unpacking allows it to be
+ used with a variable number of context managers as follows:
+
+ with nested(*managers):
+ do_something()
+
+ """
+ warn("With-statements now directly support multiple context managers",
+ DeprecationWarning, 3)
+ exits = []
+ vars = []
+ exc = (None, None, None)
+ try:
+ for mgr in managers:
+ exit = mgr.__exit__
+ enter = mgr.__enter__
+ vars.append(enter())
+ exits.append(exit)
+ yield vars
+ except:
+ exc = sys.exc_info()
+ finally:
+ while exits:
+ exit = exits.pop()
+ try:
+ if exit(*exc):
+ exc = (None, None, None)
+ except:
+ exc = sys.exc_info()
+ if exc != (None, None, None):
+ # Don't rely on sys.exc_info() still containing
+ # the right information. Another exception may
+ # have been raised and caught by an exit method
+ raise exc[0], exc[1], exc[2]
+
+
+class closing(object):
+ """Context to automatically close something at the end of a block.
+
+ Code like this:
+
+ with closing(.open()) as f:
+
+
+ is equivalent to this:
+
+ f = .open()
+ try:
+
+ finally:
+ f.close()
+
+ """
+ def __init__(self, thing):
+ self.thing = thing
+ def __enter__(self):
+ return self.thing
+ def __exit__(self, *exc_info):
+ self.thing.close()
diff --git a/cashew/Lib/cookielib.py b/cashew/Lib/cookielib.py
new file mode 100644
index 0000000..2dd7c48
--- /dev/null
+++ b/cashew/Lib/cookielib.py
@@ -0,0 +1,1810 @@
+r"""HTTP cookie handling for web clients.
+
+This module has (now fairly distant) origins in Gisle Aas' Perl module
+HTTP::Cookies, from the libwww-perl library.
+
+Docstrings, comments and debug strings in this code refer to the
+attributes of the HTTP cookie system as cookie-attributes, to distinguish
+them clearly from Python attributes.
+
+Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
+distributed with the Python standard library, but are available from
+http://wwwsearch.sf.net/):
+
+ CookieJar____
+ / \ \
+ FileCookieJar \ \
+ / | \ \ \
+ MozillaCookieJar | LWPCookieJar \ \
+ | | \
+ | ---MSIEBase | \
+ | / | | \
+ | / MSIEDBCookieJar BSDDBCookieJar
+ |/
+ MSIECookieJar
+
+"""
+
+__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
+ 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
+ 'MozillaCookieJar']
+
+import re, urlparse, copy, time, urllib
+try:
+ import threading as _threading
+except ImportError:
+ import dummy_threading as _threading
+import httplib # only for the default HTTP port
+from calendar import timegm
+
+debug = False # set to True to enable debugging via the logging module
+logger = None
+
+def _debug(*args):
+ if not debug:
+ return
+ global logger
+ if not logger:
+ import logging
+ logger = logging.getLogger("cookielib")
+ return logger.debug(*args)
+
+
+DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
+MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
+ "instance initialised with one)")
+
+def _warn_unhandled_exception():
+ # There are a few catch-all except: statements in this module, for
+ # catching input that's bad in unexpected ways. Warn if any
+ # exceptions are caught there.
+ import warnings, traceback, StringIO
+ f = StringIO.StringIO()
+ traceback.print_exc(None, f)
+ msg = f.getvalue()
+ warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
+
+
+# Date/time conversion
+# -----------------------------------------------------------------------------
+
+EPOCH_YEAR = 1970
+def _timegm(tt):
+ year, month, mday, hour, min, sec = tt[:6]
+ if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
+ (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+ return timegm(tt)
+ else:
+ return None
+
+DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+MONTHS_LOWER = []
+for month in MONTHS: MONTHS_LOWER.append(month.lower())
+
+def time2isoz(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
+ representing Universal Time (UTC, aka GMT). An example of this format is:
+
+ 1994-11-24 08:49:37Z
+
+ """
+ if t is None: t = time.time()
+ year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
+ return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
+ year, mon, mday, hour, min, sec)
+
+def time2netscape(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like this:
+
+ Wed, DD-Mon-YYYY HH:MM:SS GMT
+
+ """
+ if t is None: t = time.time()
+ year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
+ return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
+ DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
+
+
+UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
+
+TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+def offset_from_tz_string(tz):
+ offset = None
+ if tz in UTC_ZONES:
+ offset = 0
+ else:
+ m = TIMEZONE_RE.search(tz)
+ if m:
+ offset = 3600 * int(m.group(2))
+ if m.group(3):
+ offset = offset + 60 * int(m.group(3))
+ if m.group(1) == '-':
+ offset = -offset
+ return offset
+
+def _str2time(day, mon, yr, hr, min, sec, tz):
+ # translate month name to number
+ # month numbers start with 1 (January)
+ try:
+ mon = MONTHS_LOWER.index(mon.lower())+1
+ except ValueError:
+ # maybe it's already a number
+ try:
+ imon = int(mon)
+ except ValueError:
+ return None
+ if 1 <= imon <= 12:
+ mon = imon
+ else:
+ return None
+
+ # make sure clock elements are defined
+ if hr is None: hr = 0
+ if min is None: min = 0
+ if sec is None: sec = 0
+
+ yr = int(yr)
+ day = int(day)
+ hr = int(hr)
+ min = int(min)
+ sec = int(sec)
+
+ if yr < 1000:
+ # find "obvious" year
+ cur_yr = time.localtime(time.time())[0]
+ m = cur_yr % 100
+ tmp = yr
+ yr = yr + cur_yr - m
+ m = m - tmp
+ if abs(m) > 50:
+ if m > 0: yr = yr + 100
+ else: yr = yr - 100
+
+ # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
+ t = _timegm((yr, mon, day, hr, min, sec, tz))
+
+ if t is not None:
+ # adjust time using timezone string, to get absolute time since epoch
+ if tz is None:
+ tz = "UTC"
+ tz = tz.upper()
+ offset = offset_from_tz_string(tz)
+ if offset is None:
+ return None
+ t = t - offset
+
+ return t
+
+STRICT_DATE_RE = re.compile(
+ r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
+ "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+WEEKDAY_RE = re.compile(
+ r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+LOOSE_HTTP_DATE_RE = re.compile(
+ r"""^
+ (\d\d?) # day
+ (?:\s+|[-\/])
+ (\w+) # month
+ (?:\s+|[-\/])
+ (\d+) # year
+ (?:
+ (?:\s+|:) # separator before clock
+ (\d\d?):(\d\d) # hour:min
+ (?::(\d\d))? # optional seconds
+ )? # optional clock
+ \s*
+ ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+ \s*
+ (?:\(\w+\))? # ASCII representation of timezone in parens.
+ \s*$""", re.X)
+def http2time(text):
+ """Returns time in seconds since epoch of time represented by a string.
+
+ Return value is an integer.
+
+ None is returned if the format of str is unrecognized, the time is outside
+ the representable range, or the timezone string is not recognized. If the
+ string contains no timezone, UTC is assumed.
+
+ The timezone in the string may be numerical (like "-0800" or "+0100") or a
+ string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
+ timezone strings equivalent to UTC (zero offset) are known to the function.
+
+ The function loosely parses the following formats:
+
+ Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
+ Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
+ Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
+ 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
+ 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
+ 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
+
+ The parser ignores leading and trailing whitespace. The time may be
+ absent.
+
+ If the year is given with only 2 digits, the function will select the
+ century that makes the year closest to the current date.
+
+ """
+ # fast exit for strictly conforming string
+ m = STRICT_DATE_RE.search(text)
+ if m:
+ g = m.groups()
+ mon = MONTHS_LOWER.index(g[1].lower()) + 1
+ tt = (int(g[2]), mon, int(g[0]),
+ int(g[3]), int(g[4]), float(g[5]))
+ return _timegm(tt)
+
+ # No, we need some messy parsing...
+
+ # clean up
+ text = text.lstrip()
+ text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = LOOSE_HTTP_DATE_RE.search(text)
+ if m is not None:
+ day, mon, yr, hr, min, sec, tz = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
+
+ISO_DATE_RE = re.compile(
+ """^
+ (\d{4}) # year
+ [-\/]?
+ (\d\d?) # numerical month
+ [-\/]?
+ (\d\d?) # day
+ (?:
+ (?:\s+|[-:Tt]) # separator before clock
+ (\d\d?):?(\d\d) # hour:min
+ (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
+ )? # optional clock
+ \s*
+ ([-+]?\d\d?:?(:?\d\d)?
+ |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
+ \s*$""", re.X)
+def iso2time(text):
+ """
+ As for http2time, but parses the ISO 8601 formats:
+
+ 1994-02-03 14:15:29 -0100 -- ISO 8601 format
+ 1994-02-03 14:15:29 -- zone is optional
+ 1994-02-03 -- only date
+ 1994-02-03T14:15:29 -- Use T as separator
+ 19940203T141529Z -- ISO 8601 compact format
+ 19940203 -- only date
+
+ """
+ # clean up
+ text = text.lstrip()
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = ISO_DATE_RE.search(text)
+ if m is not None:
+ # XXX there's an extra bit of the timezone I'm ignoring here: is
+ # this the right thing to do?
+ yr, mon, day, hr, min, sec, tz, _ = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+# Header parsing
+# -----------------------------------------------------------------------------
+
+def unmatched(match):
+ """Return unmatched part of re.Match object."""
+ start, end = match.span(0)
+ return match.string[:start]+match.string[end:]
+
+HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
+HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
+HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
+HEADER_ESCAPE_RE = re.compile(r"\\(.)")
+def split_header_words(header_values):
+ r"""Parse header values into a list of lists containing key,value pairs.
+
+ The function knows how to deal with ",", ";" and "=" as well as quoted
+ values after "=". A list of space separated tokens are parsed as if they
+ were separated by ";".
+
+ If the header_values passed as argument contains multiple values, then they
+ are treated as if they were a single value separated by comma ",".
+
+ This means that this function is useful for parsing header fields that
+ follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+ the requirement for tokens).
+
+ headers = #header
+ header = (token | parameter) *( [";"] (token | parameter))
+
+ token = 1*
+ separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+
+ quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ qdtext = >
+ quoted-pair = "\" CHAR
+
+ parameter = attribute "=" value
+ attribute = token
+ value = token | quoted-string
+
+ Each header is represented by a list of key/value pairs. The value for a
+ simple token (not part of a parameter) is None. Syntactically incorrect
+ headers will not necessarily be parsed as you would want.
+
+ This is easier to describe with some examples:
+
+ >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
+ [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
+ >>> split_header_words(['text/html; charset="iso-8859-1"'])
+ [[('text/html', None), ('charset', 'iso-8859-1')]]
+ >>> split_header_words([r'Basic realm="\"foo\bar\""'])
+ [[('Basic', None), ('realm', '"foobar"')]]
+
+ """
+ assert not isinstance(header_values, basestring)
+ result = []
+ for text in header_values:
+ orig_text = text
+ pairs = []
+ while text:
+ m = HEADER_TOKEN_RE.search(text)
+ if m:
+ text = unmatched(m)
+ name = m.group(1)
+ m = HEADER_QUOTED_VALUE_RE.search(text)
+ if m: # quoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = HEADER_ESCAPE_RE.sub(r"\1", value)
+ else:
+ m = HEADER_VALUE_RE.search(text)
+ if m: # unquoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = value.rstrip()
+ else:
+ # no value, a lone token
+ value = None
+ pairs.append((name, value))
+ elif text.lstrip().startswith(","):
+ # concatenated headers, as per RFC 2616 section 4.2
+ text = text.lstrip()[1:]
+ if pairs: result.append(pairs)
+ pairs = []
+ else:
+ # skip junk
+ non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+ assert nr_junk_chars > 0, (
+ "split_header_words bug: '%s', '%s', %s" %
+ (orig_text, text, pairs))
+ text = non_junk
+ if pairs: result.append(pairs)
+ return result
+
+HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
+def join_header_words(lists):
+ """Do the inverse (almost) of the conversion done by split_header_words.
+
+ Takes a list of lists of (key, value) pairs and produces a single header
+ value. Attribute values are quoted if needed.
+
+ >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+ 'text/plain; charset="iso-8859/1"'
+ >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+ 'text/plain, charset="iso-8859/1"'
+
+ """
+ headers = []
+ for pairs in lists:
+ attr = []
+ for k, v in pairs:
+ if v is not None:
+ if not re.search(r"^\w+$", v):
+ v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
+ v = '"%s"' % v
+ k = "%s=%s" % (k, v)
+ attr.append(k)
+ if attr: headers.append("; ".join(attr))
+ return ", ".join(headers)
+
+def _strip_quotes(text):
+ if text.startswith('"'):
+ text = text[1:]
+ if text.endswith('"'):
+ text = text[:-1]
+ return text
+
+def parse_ns_headers(ns_headers):
+ """Ad-hoc parser for Netscape protocol cookie-attributes.
+
+ The old Netscape cookie format for Set-Cookie can for instance contain
+ an unquoted "," in the expires field, so we have to use this ad-hoc
+ parser instead of split_header_words.
+
+ XXX This may not make the best possible effort to parse all the crap
+ that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
+ parser is probably better, so could do worse than following that if
+ this ever gives any trouble.
+
+ Currently, this is also used for parsing RFC 2109 cookies.
+
+ """
+ known_attrs = ("expires", "domain", "path", "secure",
+ # RFC 2109 attrs (may turn up in Netscape cookies, too)
+ "version", "port", "max-age")
+
+ result = []
+ for ns_header in ns_headers:
+ pairs = []
+ version_set = False
+
+ # XXX: The following does not strictly adhere to RFCs in that empty
+ # names and values are legal (the former will only appear once and will
+ # be overwritten if multiple occurrences are present). This is
+ # mostly to deal with backwards compatibility.
+ for ii, param in enumerate(ns_header.split(';')):
+ param = param.strip()
+
+ key, sep, val = param.partition('=')
+ key = key.strip()
+
+ if not key:
+ if ii == 0:
+ break
+ else:
+ continue
+
+ # allow for a distinction between present and empty and missing
+ # altogether
+ val = val.strip() if sep else None
+
+ if ii != 0:
+ lc = key.lower()
+ if lc in known_attrs:
+ key = lc
+
+ if key == "version":
+ # This is an RFC 2109 cookie.
+ if val is not None:
+ val = _strip_quotes(val)
+ version_set = True
+ elif key == "expires":
+ # convert expires date to seconds since epoch
+ if val is not None:
+ val = http2time(_strip_quotes(val)) # None if invalid
+ pairs.append((key, val))
+
+ if pairs:
+ if not version_set:
+ pairs.append(("version", "0"))
+ result.append(pairs)
+
+ return result
+
+
+IPV4_RE = re.compile(r"\.\d+$")
+def is_HDN(text):
+ """Return True if text is a host domain name."""
+ # XXX
+ # This may well be wrong. Which RFC is HDN defined in, if any (for
+ # the purposes of RFC 2965)?
+ # For the current implementation, what about IPv6? Remember to look
+ # at other uses of IPV4_RE also, if change this.
+ if IPV4_RE.search(text):
+ return False
+ if text == "":
+ return False
+ if text[0] == "." or text[-1] == ".":
+ return False
+ return True
+
+def domain_match(A, B):
+ """Return True if domain A domain-matches domain B, according to RFC 2965.
+
+ A and B may be host domain names or IP addresses.
+
+ RFC 2965, section 1:
+
+ Host names can be specified either as an IP address or a HDN string.
+ Sometimes we compare one host name with another. (Such comparisons SHALL
+ be case-insensitive.) Host A's name domain-matches host B's if
+
+ * their host name strings string-compare equal; or
+
+ * A is a HDN string and has the form NB, where N is a non-empty
+ name string, B has the form .B', and B' is a HDN string. (So,
+ x.y.com domain-matches .Y.com but not Y.com.)
+
+ Note that domain-match is not a commutative operation: a.b.c.com
+ domain-matches .c.com, but not the reverse.
+
+ """
+ # Note that, if A or B are IP addresses, the only relevant part of the
+ # definition of the domain-match algorithm is the direct string-compare.
+ A = A.lower()
+ B = B.lower()
+ if A == B:
+ return True
+ if not is_HDN(A):
+ return False
+ i = A.rfind(B)
+ if i == -1 or i == 0:
+ # A does not have form NB, or N is the empty string
+ return False
+ if not B.startswith("."):
+ return False
+ if not is_HDN(B[1:]):
+ return False
+ return True
+
+def liberal_is_HDN(text):
+ """Return True if text is a sort-of-like a host domain name.
+
+ For accepting/blocking domains.
+
+ """
+ if IPV4_RE.search(text):
+ return False
+ return True
+
+def user_domain_match(A, B):
+ """For blocking/accepting domains.
+
+ A and B may be host domain names or IP addresses.
+
+ """
+ A = A.lower()
+ B = B.lower()
+ if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
+ if A == B:
+ # equal IP addresses
+ return True
+ return False
+ initial_dot = B.startswith(".")
+ if initial_dot and A.endswith(B):
+ return True
+ if not initial_dot and A == B:
+ return True
+ return False
+
+cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.get_full_url()
+ host = urlparse.urlparse(url)[1]
+ if host == "":
+ host = request.get_header("Host", "")
+
+ # remove port, if present
+ host = cut_port_re.sub("", host, 1)
+ return host.lower()
+
+def eff_request_host(request):
+ """Return a tuple (request-host, effective request-host name).
+
+ As defined by RFC 2965, except both are lowercased.
+
+ """
+ erhn = req_host = request_host(request)
+ if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
+ erhn = req_host + ".local"
+ return req_host, erhn
+
+def request_path(request):
+ """Path component of request-URI, as defined by RFC 2965."""
+ url = request.get_full_url()
+ parts = urlparse.urlsplit(url)
+ path = escape_path(parts.path)
+ if not path.startswith("/"):
+ # fix bad RFC 2396 absoluteURI
+ path = "/" + path
+ return path
+
+def request_port(request):
+ host = request.get_host()
+ i = host.find(':')
+ if i >= 0:
+ port = host[i+1:]
+ try:
+ int(port)
+ except ValueError:
+ _debug("nonnumeric port: '%s'", port)
+ return None
+ else:
+ port = DEFAULT_HTTP_PORT
+ return port
+
+# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
+# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
+HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
+ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
+def uppercase_escaped_char(match):
+ return "%%%s" % match.group(1).upper()
+def escape_path(path):
+ """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
+ # There's no knowing what character encoding was used to create URLs
+ # containing %-escapes, but since we have to pick one to escape invalid
+ # path characters, we pick UTF-8, as recommended in the HTML 4.0
+ # specification:
+ # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
+ # And here, kind of: draft-fielding-uri-rfc2396bis-03
+ # (And in draft IRI specification: draft-duerst-iri-05)
+ # (And here, for new URI schemes: RFC 2718)
+ if isinstance(path, unicode):
+ path = path.encode("utf-8")
+ path = urllib.quote(path, HTTP_PATH_SAFE)
+ path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
+ return path
+
+def reach(h):
+ """Return reach of host h, as defined by RFC 2965, section 1.
+
+ The reach R of a host name H is defined as follows:
+
+ * If
+
+ - H is the host domain name of a host; and,
+
+ - H has the form A.B; and
+
+ - A has no embedded (that is, interior) dots; and
+
+ - B has at least one embedded dot, or B is the string "local".
+ then the reach of H is .B.
+
+ * Otherwise, the reach of H is H.
+
+ >>> reach("www.acme.com")
+ '.acme.com'
+ >>> reach("acme.com")
+ 'acme.com'
+ >>> reach("acme.local")
+ '.local'
+
+ """
+ i = h.find(".")
+ if i >= 0:
+ #a = h[:i] # this line is only here to show what a is
+ b = h[i+1:]
+ i = b.find(".")
+ if is_HDN(h) and (i >= 0 or b == "local"):
+ return "."+b
+ return h
+
+def is_third_party(request):
+ """
+
+ RFC 2965, section 3.3.6:
+
+ An unverifiable transaction is to a third-party host if its request-
+ host U does not domain-match the reach R of the request-host O in the
+ origin transaction.
+
+ """
+ req_host = request_host(request)
+ if not domain_match(req_host, reach(request.get_origin_req_host())):
+ return True
+ else:
+ return False
+
+
+class Cookie:
+ """HTTP Cookie.
+
+ This class represents both Netscape and RFC 2965 cookies.
+
+ This is deliberately a very simple class. It just holds attributes. It's
+ possible to construct Cookie instances that don't comply with the cookie
+ standards. CookieJar.make_cookies is the factory function for Cookie
+ objects -- it deals with cookie parsing, supplying defaults, and
+ normalising to the representation used in this class. CookiePolicy is
+ responsible for checking them to see whether they should be accepted from
+ and returned to the server.
+
+ Note that the port may be present in the headers, but unspecified ("Port"
+ rather than"Port=80", for example); if this is the case, port is None.
+
+ """
+
+ def __init__(self, version, name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest,
+ rfc2109=False,
+ ):
+
+ if version is not None: version = int(version)
+ if expires is not None: expires = int(expires)
+ if port is None and port_specified is True:
+ raise ValueError("if port is None, port_specified must be false")
+
+ self.version = version
+ self.name = name
+ self.value = value
+ self.port = port
+ self.port_specified = port_specified
+ # normalise case, as per RFC 2965 section 3.3.3
+ self.domain = domain.lower()
+ self.domain_specified = domain_specified
+ # Sigh. We need to know whether the domain given in the
+ # cookie-attribute had an initial dot, in order to follow RFC 2965
+ # (as clarified in draft errata). Needed for the returned $Domain
+ # value.
+ self.domain_initial_dot = domain_initial_dot
+ self.path = path
+ self.path_specified = path_specified
+ self.secure = secure
+ self.expires = expires
+ self.discard = discard
+ self.comment = comment
+ self.comment_url = comment_url
+ self.rfc2109 = rfc2109
+
+ self._rest = copy.copy(rest)
+
+ def has_nonstandard_attr(self, name):
+ return name in self._rest
+ def get_nonstandard_attr(self, name, default=None):
+ return self._rest.get(name, default)
+ def set_nonstandard_attr(self, name, value):
+ self._rest[name] = value
+
+ def is_expired(self, now=None):
+ if now is None: now = time.time()
+ if (self.expires is not None) and (self.expires <= now):
+ return True
+ return False
+
+ def __str__(self):
+ if self.port is None: p = ""
+ else: p = ":"+self.port
+ limit = self.domain + p + self.path
+ if self.value is not None:
+ namevalue = "%s=%s" % (self.name, self.value)
+ else:
+ namevalue = self.name
+ return "" % (namevalue, limit)
+
+ def __repr__(self):
+ args = []
+ for name in ("version", "name", "value",
+ "port", "port_specified",
+ "domain", "domain_specified", "domain_initial_dot",
+ "path", "path_specified",
+ "secure", "expires", "discard", "comment", "comment_url",
+ ):
+ attr = getattr(self, name)
+ args.append("%s=%s" % (name, repr(attr)))
+ args.append("rest=%s" % repr(self._rest))
+ args.append("rfc2109=%s" % repr(self.rfc2109))
+ return "Cookie(%s)" % ", ".join(args)
+
+
+class CookiePolicy:
+ """Defines which cookies get accepted from and returned to server.
+
+ May also modify cookies, though this is probably a bad idea.
+
+ The subclass DefaultCookiePolicy defines the standard rules for Netscape
+ and RFC 2965 cookies -- override that if you want a customised policy.
+
+ """
+ def set_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be accepted from server.
+
+ Currently, pre-expired cookies never get this far -- the CookieJar
+ class deletes such cookies itself.
+
+ """
+ raise NotImplementedError()
+
+ def return_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be returned to server."""
+ raise NotImplementedError()
+
+ def domain_return_ok(self, domain, request):
+ """Return false if cookies should not be returned, given cookie domain.
+ """
+ return True
+
+ def path_return_ok(self, path, request):
+ """Return false if cookies should not be returned, given cookie path.
+ """
+ return True
+
+
+class DefaultCookiePolicy(CookiePolicy):
+ """Implements the standard rules for accepting and returning cookies."""
+
+ DomainStrictNoDots = 1
+ DomainStrictNonDomain = 2
+ DomainRFC2965Match = 4
+
+ DomainLiberal = 0
+ DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
+
+ def __init__(self,
+ blocked_domains=None, allowed_domains=None,
+ netscape=True, rfc2965=False,
+ rfc2109_as_netscape=None,
+ hide_cookie2=False,
+ strict_domain=False,
+ strict_rfc2965_unverifiable=True,
+ strict_ns_unverifiable=False,
+ strict_ns_domain=DomainLiberal,
+ strict_ns_set_initial_dollar=False,
+ strict_ns_set_path=False,
+ ):
+ """Constructor arguments should be passed as keyword arguments only."""
+ self.netscape = netscape
+ self.rfc2965 = rfc2965
+ self.rfc2109_as_netscape = rfc2109_as_netscape
+ self.hide_cookie2 = hide_cookie2
+ self.strict_domain = strict_domain
+ self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
+ self.strict_ns_unverifiable = strict_ns_unverifiable
+ self.strict_ns_domain = strict_ns_domain
+ self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
+ self.strict_ns_set_path = strict_ns_set_path
+
+ if blocked_domains is not None:
+ self._blocked_domains = tuple(blocked_domains)
+ else:
+ self._blocked_domains = ()
+
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def blocked_domains(self):
+ """Return the sequence of blocked domains (as a tuple)."""
+ return self._blocked_domains
+ def set_blocked_domains(self, blocked_domains):
+ """Set the sequence of blocked domains."""
+ self._blocked_domains = tuple(blocked_domains)
+
+ def is_blocked(self, domain):
+ for blocked_domain in self._blocked_domains:
+ if user_domain_match(domain, blocked_domain):
+ return True
+ return False
+
+ def allowed_domains(self):
+ """Return None, or the sequence of allowed domains (as a tuple)."""
+ return self._allowed_domains
+ def set_allowed_domains(self, allowed_domains):
+ """Set the sequence of allowed domains, or None."""
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def is_not_allowed(self, domain):
+ if self._allowed_domains is None:
+ return False
+ for allowed_domain in self._allowed_domains:
+ if user_domain_match(domain, allowed_domain):
+ return False
+ return True
+
+ def set_ok(self, cookie, request):
+ """
+ If you override .set_ok(), be sure to call this method. If it returns
+ false, so should your subclass (assuming your subclass wants to be more
+ strict about which cookies to accept).
+
+ """
+ _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
+
+ assert cookie.name is not None
+
+ for n in "version", "verifiability", "name", "path", "domain", "port":
+ fn_name = "set_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+
+ return True
+
+ def set_ok_version(self, cookie, request):
+ if cookie.version is None:
+ # Version is always set to 0 by parse_ns_headers if it's a Netscape
+ # cookie, so this must be an invalid RFC 2965 cookie.
+ _debug(" Set-Cookie2 without version attribute (%s=%s)",
+ cookie.name, cookie.value)
+ return False
+ if cookie.version > 0 and not self.rfc2965:
+ _debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ _debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def set_ok_verifiability(self, cookie, request):
+ if request.is_unverifiable() and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ _debug(" third-party RFC 2965 cookie during "
+ "unverifiable transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ _debug(" third-party Netscape cookie during "
+ "unverifiable transaction")
+ return False
+ return True
+
+ def set_ok_name(self, cookie, request):
+ # Try and stop servers setting V0 cookies designed to hack other
+ # servers that know both V0 and V1 protocols.
+ if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
+ cookie.name.startswith("$")):
+ _debug(" illegal name (starts with '$'): '%s'", cookie.name)
+ return False
+ return True
+
+ def set_ok_path(self, cookie, request):
+ if cookie.path_specified:
+ req_path = request_path(request)
+ if ((cookie.version > 0 or
+ (cookie.version == 0 and self.strict_ns_set_path)) and
+ not req_path.startswith(cookie.path)):
+ _debug(" path attribute %s is not a prefix of request "
+ "path %s", cookie.path, req_path)
+ return False
+ return True
+
+ def set_ok_domain(self, cookie, request):
+ if self.is_blocked(cookie.domain):
+ _debug(" domain %s is in user block-list", cookie.domain)
+ return False
+ if self.is_not_allowed(cookie.domain):
+ _debug(" domain %s is not in user allow-list", cookie.domain)
+ return False
+ if cookie.domain_specified:
+ req_host, erhn = eff_request_host(request)
+ domain = cookie.domain
+ if self.strict_domain and (domain.count(".") >= 2):
+ # XXX This should probably be compared with the Konqueror
+ # (kcookiejar.cpp) and Mozilla implementations, but it's a
+ # losing battle.
+ i = domain.rfind(".")
+ j = domain.rfind(".", 0, i)
+ if j == 0: # domain like .foo.bar
+ tld = domain[i+1:]
+ sld = domain[j+1:i]
+ if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
+ "gov", "mil", "int", "aero", "biz", "cat", "coop",
+ "info", "jobs", "mobi", "museum", "name", "pro",
+ "travel", "eu") and len(tld) == 2:
+ # domain like .co.uk
+ _debug(" country-code second level domain %s", domain)
+ return False
+ if domain.startswith("."):
+ undotted_domain = domain[1:]
+ else:
+ undotted_domain = domain
+ embedded_dots = (undotted_domain.find(".") >= 0)
+ if not embedded_dots and domain != ".local":
+ _debug(" non-local domain %s contains no embedded dot",
+ domain)
+ return False
+ if cookie.version == 0:
+ if (not erhn.endswith(domain) and
+ (not erhn.startswith(".") and
+ not ("."+erhn).endswith(domain))):
+ _debug(" effective request-host %s (even with added "
+ "initial dot) does not end with %s",
+ erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainRFC2965Match)):
+ if not domain_match(erhn, domain):
+ _debug(" effective request-host %s does not domain-match "
+ "%s", erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainStrictNoDots)):
+ host_prefix = req_host[:-len(domain)]
+ if (host_prefix.find(".") >= 0 and
+ not IPV4_RE.search(req_host)):
+ _debug(" host prefix %s for domain %s contains a dot",
+ host_prefix, domain)
+ return False
+ return True
+
+ def set_ok_port(self, cookie, request):
+ if cookie.port_specified:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ else:
+ req_port = str(req_port)
+ for p in cookie.port.split(","):
+ try:
+ int(p)
+ except ValueError:
+ _debug(" bad port %s (not numeric)", p)
+ return False
+ if p == req_port:
+ break
+ else:
+ _debug(" request port (%s) not found in %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok(self, cookie, request):
+ """
+ If you override .return_ok(), be sure to call this method. If it
+ returns false, so should your subclass (assuming your subclass wants to
+ be more strict about which cookies to return).
+
+ """
+ # Path has already been checked by .path_return_ok(), and domain
+ # blocking done by .domain_return_ok().
+ _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
+
+ for n in "version", "verifiability", "secure", "expires", "port", "domain":
+ fn_name = "return_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+ return True
+
+ def return_ok_version(self, cookie, request):
+ if cookie.version > 0 and not self.rfc2965:
+ _debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ _debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def return_ok_verifiability(self, cookie, request):
+ if request.is_unverifiable() and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ _debug(" third-party RFC 2965 cookie during unverifiable "
+ "transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ _debug(" third-party Netscape cookie during unverifiable "
+ "transaction")
+ return False
+ return True
+
+ def return_ok_secure(self, cookie, request):
+ if cookie.secure and request.get_type() != "https":
+ _debug(" secure cookie with non-secure request")
+ return False
+ return True
+
+ def return_ok_expires(self, cookie, request):
+ if cookie.is_expired(self._now):
+ _debug(" cookie expired")
+ return False
+ return True
+
+ def return_ok_port(self, cookie, request):
+ if cookie.port:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ for p in cookie.port.split(","):
+ if p == req_port:
+ break
+ else:
+ _debug(" request port %s does not match cookie port %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok_domain(self, cookie, request):
+ req_host, erhn = eff_request_host(request)
+ domain = cookie.domain
+
+ # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
+ if (cookie.version == 0 and
+ (self.strict_ns_domain & self.DomainStrictNonDomain) and
+ not cookie.domain_specified and domain != erhn):
+ _debug(" cookie with unspecified domain does not string-compare "
+ "equal to request domain")
+ return False
+
+ if cookie.version > 0 and not domain_match(erhn, domain):
+ _debug(" effective request-host name %s does not domain-match "
+ "RFC 2965 cookie domain %s", erhn, domain)
+ return False
+ if cookie.version == 0 and not ("."+erhn).endswith(domain):
+ _debug(" request-host %s does not match Netscape cookie domain "
+ "%s", req_host, domain)
+ return False
+ return True
+
+ def domain_return_ok(self, domain, request):
+ # Liberal check of. This is here as an optimization to avoid
+ # having to load lots of MSIE cookie files unless necessary.
+ req_host, erhn = eff_request_host(request)
+ if not req_host.startswith("."):
+ req_host = "."+req_host
+ if not erhn.startswith("."):
+ erhn = "."+erhn
+ if not (req_host.endswith(domain) or erhn.endswith(domain)):
+ #_debug(" request domain %s does not match cookie domain %s",
+ # req_host, domain)
+ return False
+
+ if self.is_blocked(domain):
+ _debug(" domain %s is in user block-list", domain)
+ return False
+ if self.is_not_allowed(domain):
+ _debug(" domain %s is not in user allow-list", domain)
+ return False
+
+ return True
+
+ def path_return_ok(self, path, request):
+ _debug("- checking cookie path=%s", path)
+ req_path = request_path(request)
+ if not req_path.startswith(path):
+ _debug(" %s does not path-match %s", req_path, path)
+ return False
+ return True
+
+
+def vals_sorted_by_key(adict):
+ keys = adict.keys()
+ keys.sort()
+ return map(adict.get, keys)
+
+def deepvalues(mapping):
+ """Iterates over nested mapping, depth-first, in sorted order by key."""
+ values = vals_sorted_by_key(mapping)
+ for obj in values:
+ mapping = False
+ try:
+ obj.items
+ except AttributeError:
+ pass
+ else:
+ mapping = True
+ for subobj in deepvalues(obj):
+ yield subobj
+ if not mapping:
+ yield obj
+
+
+# Used as second parameter to dict.get() method, to distinguish absent
+# dict key from one with a None value.
+class Absent: pass
+
+class CookieJar:
+ """Collection of HTTP cookies.
+
+ You may not need to know about this class: try
+ urllib2.build_opener(HTTPCookieProcessor).open(url).
+
+ """
+
+ non_word_re = re.compile(r"\W")
+ quote_re = re.compile(r"([\"\\])")
+ strict_domain_re = re.compile(r"\.?[^.]*")
+ domain_re = re.compile(r"[^.]*")
+ dots_re = re.compile(r"^\.+")
+
+ magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
+
+ def __init__(self, policy=None):
+ if policy is None:
+ policy = DefaultCookiePolicy()
+ self._policy = policy
+
+ self._cookies_lock = _threading.RLock()
+ self._cookies = {}
+
+ def set_policy(self, policy):
+ self._policy = policy
+
+ def _cookies_for_domain(self, domain, request):
+ cookies = []
+ if not self._policy.domain_return_ok(domain, request):
+ return []
+ _debug("Checking %s for cookies to return", domain)
+ cookies_by_path = self._cookies[domain]
+ for path in cookies_by_path.keys():
+ if not self._policy.path_return_ok(path, request):
+ continue
+ cookies_by_name = cookies_by_path[path]
+ for cookie in cookies_by_name.values():
+ if not self._policy.return_ok(cookie, request):
+ _debug(" not returning cookie")
+ continue
+ _debug(" it's a match")
+ cookies.append(cookie)
+ return cookies
+
+ def _cookies_for_request(self, request):
+ """Return a list of cookies to be returned to server."""
+ cookies = []
+ for domain in self._cookies.keys():
+ cookies.extend(self._cookies_for_domain(domain, request))
+ return cookies
+
+ def _cookie_attrs(self, cookies):
+ """Return a list of cookie-attributes to be returned to server.
+
+ like ['foo="bar"; $Path="/"', ...]
+
+ The $Version attribute is also added when appropriate (currently only
+ once per request).
+
+ """
+ # add cookies in order of most specific (ie. longest) path first
+ cookies.sort(key=lambda arg: len(arg.path), reverse=True)
+
+ version_set = False
+
+ attrs = []
+ for cookie in cookies:
+ # set version of Cookie header
+ # XXX
+ # What should it be if multiple matching Set-Cookie headers have
+ # different versions themselves?
+ # Answer: there is no answer; was supposed to be settled by
+ # RFC 2965 errata, but that may never appear...
+ version = cookie.version
+ if not version_set:
+ version_set = True
+ if version > 0:
+ attrs.append("$Version=%s" % version)
+
+ # quote cookie value if necessary
+ # (not for Netscape protocol, which already has any quotes
+ # intact, due to the poorly-specified Netscape Cookie: syntax)
+ if ((cookie.value is not None) and
+ self.non_word_re.search(cookie.value) and version > 0):
+ value = self.quote_re.sub(r"\\\1", cookie.value)
+ else:
+ value = cookie.value
+
+ # add cookie-attributes to be returned in Cookie header
+ if cookie.value is None:
+ attrs.append(cookie.name)
+ else:
+ attrs.append("%s=%s" % (cookie.name, value))
+ if version > 0:
+ if cookie.path_specified:
+ attrs.append('$Path="%s"' % cookie.path)
+ if cookie.domain.startswith("."):
+ domain = cookie.domain
+ if (not cookie.domain_initial_dot and
+ domain.startswith(".")):
+ domain = domain[1:]
+ attrs.append('$Domain="%s"' % domain)
+ if cookie.port is not None:
+ p = "$Port"
+ if cookie.port_specified:
+ p = p + ('="%s"' % cookie.port)
+ attrs.append(p)
+
+ return attrs
+
+ def add_cookie_header(self, request):
+ """Add correct Cookie: header to request (urllib2.Request object).
+
+ The Cookie2 header is also added unless policy.hide_cookie2 is true.
+
+ """
+ _debug("add_cookie_header")
+ self._cookies_lock.acquire()
+ try:
+
+ self._policy._now = self._now = int(time.time())
+
+ cookies = self._cookies_for_request(request)
+
+ attrs = self._cookie_attrs(cookies)
+ if attrs:
+ if not request.has_header("Cookie"):
+ request.add_unredirected_header(
+ "Cookie", "; ".join(attrs))
+
+ # if necessary, advertise that we know RFC 2965
+ if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
+ not request.has_header("Cookie2")):
+ for cookie in cookies:
+ if cookie.version != 1:
+ request.add_unredirected_header("Cookie2", '$Version="1"')
+ break
+
+ finally:
+ self._cookies_lock.release()
+
+ self.clear_expired_cookies()
+
+ def _normalized_cookie_tuples(self, attrs_set):
+ """Return list of tuples containing normalised cookie information.
+
+ attrs_set is the list of lists of key,value pairs extracted from
+ the Set-Cookie or Set-Cookie2 headers.
+
+ Tuples are name, value, standard, rest, where name and value are the
+ cookie name and value, standard is a dictionary containing the standard
+ cookie-attributes (discard, secure, version, expires or max-age,
+ domain, path and port) and rest is a dictionary containing the rest of
+ the cookie-attributes.
+
+ """
+ cookie_tuples = []
+
+ boolean_attrs = "discard", "secure"
+ value_attrs = ("version",
+ "expires", "max-age",
+ "domain", "path", "port",
+ "comment", "commenturl")
+
+ for cookie_attrs in attrs_set:
+ name, value = cookie_attrs[0]
+
+ # Build dictionary of standard cookie-attributes (standard) and
+ # dictionary of other cookie-attributes (rest).
+
+ # Note: expiry time is normalised to seconds since epoch. V0
+ # cookies should have the Expires cookie-attribute, and V1 cookies
+ # should have Max-Age, but since V1 includes RFC 2109 cookies (and
+ # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
+ # accept either (but prefer Max-Age).
+ max_age_set = False
+
+ bad_cookie = False
+
+ standard = {}
+ rest = {}
+ for k, v in cookie_attrs[1:]:
+ lc = k.lower()
+ # don't lose case distinction for unknown fields
+ if lc in value_attrs or lc in boolean_attrs:
+ k = lc
+ if k in boolean_attrs and v is None:
+ # boolean cookie-attribute is present, but has no value
+ # (like "discard", rather than "port=80")
+ v = True
+ if k in standard:
+ # only first value is significant
+ continue
+ if k == "domain":
+ if v is None:
+ _debug(" missing value for domain attribute")
+ bad_cookie = True
+ break
+ # RFC 2965 section 3.3.3
+ v = v.lower()
+ if k == "expires":
+ if max_age_set:
+ # Prefer max-age to expires (like Mozilla)
+ continue
+ if v is None:
+ _debug(" missing or invalid value for expires "
+ "attribute: treating as session cookie")
+ continue
+ if k == "max-age":
+ max_age_set = True
+ try:
+ v = int(v)
+ except ValueError:
+ _debug(" missing or invalid (non-numeric) value for "
+ "max-age attribute")
+ bad_cookie = True
+ break
+ # convert RFC 2965 Max-Age to seconds since epoch
+ # XXX Strictly you're supposed to follow RFC 2616
+ # age-calculation rules. Remember that zero Max-Age
+ # is a request to discard (old and new) cookie, though.
+ k = "expires"
+ v = self._now + v
+ if (k in value_attrs) or (k in boolean_attrs):
+ if (v is None and
+ k not in ("port", "comment", "commenturl")):
+ _debug(" missing value for %s attribute" % k)
+ bad_cookie = True
+ break
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ if bad_cookie:
+ continue
+
+ cookie_tuples.append((name, value, standard, rest))
+
+ return cookie_tuples
+
+ def _cookie_from_cookie_tuple(self, tup, request):
+ # standard is dict of standard cookie-attributes, rest is dict of the
+ # rest of them
+ name, value, standard, rest = tup
+
+ domain = standard.get("domain", Absent)
+ path = standard.get("path", Absent)
+ port = standard.get("port", Absent)
+ expires = standard.get("expires", Absent)
+
+ # set the easy defaults
+ version = standard.get("version", None)
+ if version is not None:
+ try:
+ version = int(version)
+ except ValueError:
+ return None # invalid version, ignore cookie
+ secure = standard.get("secure", False)
+ # (discard is also set if expires is Absent)
+ discard = standard.get("discard", False)
+ comment = standard.get("comment", None)
+ comment_url = standard.get("commenturl", None)
+
+ # set default path
+ if path is not Absent and path != "":
+ path_specified = True
+ path = escape_path(path)
+ else:
+ path_specified = False
+ path = request_path(request)
+ i = path.rfind("/")
+ if i != -1:
+ if version == 0:
+ # Netscape spec parts company from reality here
+ path = path[:i]
+ else:
+ path = path[:i+1]
+ if len(path) == 0: path = "/"
+
+ # set default domain
+ domain_specified = domain is not Absent
+ # but first we have to remember whether it starts with a dot
+ domain_initial_dot = False
+ if domain_specified:
+ domain_initial_dot = bool(domain.startswith("."))
+ if domain is Absent:
+ req_host, erhn = eff_request_host(request)
+ domain = erhn
+ elif not domain.startswith("."):
+ domain = "."+domain
+
+ # set default port
+ port_specified = False
+ if port is not Absent:
+ if port is None:
+ # Port attr present, but has no value: default to request port.
+ # Cookie should then only be sent back on that port.
+ port = request_port(request)
+ else:
+ port_specified = True
+ port = re.sub(r"\s+", "", port)
+ else:
+ # No port attr present. Cookie can be sent back on any port.
+ port = None
+
+ # set default expires and discard
+ if expires is Absent:
+ expires = None
+ discard = True
+ elif expires <= self._now:
+ # Expiry date in past is request to delete cookie. This can't be
+ # in DefaultCookiePolicy, because can't delete cookies there.
+ try:
+ self.clear(domain, path, name)
+ except KeyError:
+ pass
+ _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
+ domain, path, name)
+ return None
+
+ return Cookie(version,
+ name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest)
+
+ def _cookies_from_attrs_set(self, attrs_set, request):
+ cookie_tuples = self._normalized_cookie_tuples(attrs_set)
+
+ cookies = []
+ for tup in cookie_tuples:
+ cookie = self._cookie_from_cookie_tuple(tup, request)
+ if cookie: cookies.append(cookie)
+ return cookies
+
+ def _process_rfc2109_cookies(self, cookies):
+ rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
+ if rfc2109_as_ns is None:
+ rfc2109_as_ns = not self._policy.rfc2965
+ for cookie in cookies:
+ if cookie.version == 1:
+ cookie.rfc2109 = True
+ if rfc2109_as_ns:
+ # treat 2109 cookies as Netscape cookies rather than
+ # as RFC2965 cookies
+ cookie.version = 0
+
+ def make_cookies(self, response, request):
+ """Return sequence of Cookie objects extracted from response object."""
+ # get cookie-attributes for RFC 2965 and Netscape protocols
+ headers = response.info()
+ rfc2965_hdrs = headers.getheaders("Set-Cookie2")
+ ns_hdrs = headers.getheaders("Set-Cookie")
+
+ rfc2965 = self._policy.rfc2965
+ netscape = self._policy.netscape
+
+ if ((not rfc2965_hdrs and not ns_hdrs) or
+ (not ns_hdrs and not rfc2965) or
+ (not rfc2965_hdrs and not netscape) or
+ (not netscape and not rfc2965)):
+ return [] # no relevant cookie headers: quick exit
+
+ try:
+ cookies = self._cookies_from_attrs_set(
+ split_header_words(rfc2965_hdrs), request)
+ except Exception:
+ _warn_unhandled_exception()
+ cookies = []
+
+ if ns_hdrs and netscape:
+ try:
+ # RFC 2109 and Netscape cookies
+ ns_cookies = self._cookies_from_attrs_set(
+ parse_ns_headers(ns_hdrs), request)
+ except Exception:
+ _warn_unhandled_exception()
+ ns_cookies = []
+ self._process_rfc2109_cookies(ns_cookies)
+
+ # Look for Netscape cookies (from Set-Cookie headers) that match
+ # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
+ # For each match, keep the RFC 2965 cookie and ignore the Netscape
+ # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
+ # bundled in with the Netscape cookies for this purpose, which is
+ # reasonable behaviour.
+ if rfc2965:
+ lookup = {}
+ for cookie in cookies:
+ lookup[(cookie.domain, cookie.path, cookie.name)] = None
+
+ def no_matching_rfc2965(ns_cookie, lookup=lookup):
+ key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
+ return key not in lookup
+ ns_cookies = filter(no_matching_rfc2965, ns_cookies)
+
+ if ns_cookies:
+ cookies.extend(ns_cookies)
+
+ return cookies
+
+ def set_cookie_if_ok(self, cookie, request):
+ """Set a cookie if policy says it's OK to do so."""
+ self._cookies_lock.acquire()
+ try:
+ self._policy._now = self._now = int(time.time())
+
+ if self._policy.set_ok(cookie, request):
+ self.set_cookie(cookie)
+
+
+ finally:
+ self._cookies_lock.release()
+
+ def set_cookie(self, cookie):
+ """Set a cookie, without checking whether or not it should be set."""
+ c = self._cookies
+ self._cookies_lock.acquire()
+ try:
+ if cookie.domain not in c: c[cookie.domain] = {}
+ c2 = c[cookie.domain]
+ if cookie.path not in c2: c2[cookie.path] = {}
+ c3 = c2[cookie.path]
+ c3[cookie.name] = cookie
+ finally:
+ self._cookies_lock.release()
+
+ def extract_cookies(self, response, request):
+ """Extract cookies from response, where allowable given the request."""
+ _debug("extract_cookies: %s", response.info())
+ self._cookies_lock.acquire()
+ try:
+ self._policy._now = self._now = int(time.time())
+
+ for cookie in self.make_cookies(response, request):
+ if self._policy.set_ok(cookie, request):
+ _debug(" setting cookie: %s", cookie)
+ self.set_cookie(cookie)
+ finally:
+ self._cookies_lock.release()
+
+ def clear(self, domain=None, path=None, name=None):
+ """Clear some cookies.
+
+ Invoking this method without arguments will clear all cookies. If
+ given a single argument, only cookies belonging to that domain will be
+ removed. If given two arguments, cookies belonging to the specified
+ path within that domain are removed. If given three arguments, then
+ the cookie with the specified name, path and domain is removed.
+
+ Raises KeyError if no matching cookie exists.
+
+ """
+ if name is not None:
+ if (domain is None) or (path is None):
+ raise ValueError(
+ "domain and path must be given to remove a cookie by name")
+ del self._cookies[domain][path][name]
+ elif path is not None:
+ if domain is None:
+ raise ValueError(
+ "domain must be given to remove cookies by path")
+ del self._cookies[domain][path]
+ elif domain is not None:
+ del self._cookies[domain]
+ else:
+ self._cookies = {}
+
+ def clear_session_cookies(self):
+ """Discard all session cookies.
+
+ Note that the .save() method won't save session cookies anyway, unless
+ you ask otherwise by passing a true ignore_discard argument.
+
+ """
+ self._cookies_lock.acquire()
+ try:
+ for cookie in self:
+ if cookie.discard:
+ self.clear(cookie.domain, cookie.path, cookie.name)
+ finally:
+ self._cookies_lock.release()
+
+ def clear_expired_cookies(self):
+ """Discard all expired cookies.
+
+ You probably don't need to call this method: expired cookies are never
+ sent back to the server (provided you're using DefaultCookiePolicy),
+ this method is called by CookieJar itself every so often, and the
+ .save() method won't save expired cookies anyway (unless you ask
+ otherwise by passing a true ignore_expires argument).
+
+ """
+ self._cookies_lock.acquire()
+ try:
+ now = time.time()
+ for cookie in self:
+ if cookie.is_expired(now):
+ self.clear(cookie.domain, cookie.path, cookie.name)
+ finally:
+ self._cookies_lock.release()
+
+ def __iter__(self):
+ return deepvalues(self._cookies)
+
+ def __len__(self):
+ """Return number of contained cookies."""
+ i = 0
+ for cookie in self: i = i + 1
+ return i
+
+ def __repr__(self):
+ r = []
+ for cookie in self: r.append(repr(cookie))
+ return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
+
+ def __str__(self):
+ r = []
+ for cookie in self: r.append(str(cookie))
+ return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
+
+
+# derives from IOError for backwards-compatibility with Python 2.4.0
+class LoadError(IOError): pass
+
+class FileCookieJar(CookieJar):
+ """CookieJar that can be loaded from and saved to a file."""
+
+ def __init__(self, filename=None, delayload=False, policy=None):
+ """
+ Cookies are NOT loaded from the named file until either the .load() or
+ .revert() method is called.
+
+ """
+ CookieJar.__init__(self, policy)
+ if filename is not None:
+ try:
+ filename+""
+ except:
+ raise ValueError("filename must be string-like")
+ self.filename = filename
+ self.delayload = bool(delayload)
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Save cookies to a file."""
+ raise NotImplementedError()
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename)
+ try:
+ self._really_load(f, filename, ignore_discard, ignore_expires)
+ finally:
+ f.close()
+
+ def revert(self, filename=None,
+ ignore_discard=False, ignore_expires=False):
+ """Clear all cookies and reload cookies from a saved file.
+
+ Raises LoadError (or IOError) if reversion is not successful; the
+ object's state will not be altered if this happens.
+
+ """
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ self._cookies_lock.acquire()
+ try:
+
+ old_state = copy.deepcopy(self._cookies)
+ self._cookies = {}
+ try:
+ self.load(filename, ignore_discard, ignore_expires)
+ except (LoadError, IOError):
+ self._cookies = old_state
+ raise
+
+ finally:
+ self._cookies_lock.release()
+
+from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
+from _MozillaCookieJar import MozillaCookieJar
diff --git a/cashew/Lib/copy.py b/cashew/Lib/copy.py
new file mode 100644
index 0000000..daf81a3
--- /dev/null
+++ b/cashew/Lib/copy.py
@@ -0,0 +1,433 @@
+"""Generic (shallow and deep) copying operations.
+
+Interface summary:
+
+ import copy
+
+ x = copy.copy(y) # make a shallow copy of y
+ x = copy.deepcopy(y) # make a deep copy of y
+
+For module specific errors, copy.Error is raised.
+
+The difference between shallow and deep copying is only relevant for
+compound objects (objects that contain other objects, like lists or
+class instances).
+
+- A shallow copy constructs a new compound object and then (to the
+ extent possible) inserts *the same objects* into it that the
+ original contains.
+
+- A deep copy constructs a new compound object and then, recursively,
+ inserts *copies* into it of the objects found in the original.
+
+Two problems often exist with deep copy operations that don't exist
+with shallow copy operations:
+
+ a) recursive objects (compound objects that, directly or indirectly,
+ contain a reference to themselves) may cause a recursive loop
+
+ b) because deep copy copies *everything* it may copy too much, e.g.
+ administrative data structures that should be shared even between
+ copies
+
+Python's deep copy operation avoids these problems by:
+
+ a) keeping a table of objects already copied during the current
+ copying pass
+
+ b) letting user-defined classes override the copying operation or the
+ set of components copied
+
+This version does not copy types like module, class, function, method,
+nor stack trace, stack frame, nor file, socket, window, nor array, nor
+any similar types.
+
+Classes can use the same interfaces to control copying that they use
+to control pickling: they can define methods called __getinitargs__(),
+__getstate__() and __setstate__(). See the documentation for module
+"pickle" for information on these methods.
+"""
+
+import types
+import weakref
+from copy_reg import dispatch_table
+
+class Error(Exception):
+ pass
+error = Error # backward compatibility
+
+try:
+ from org.python.core import PyStringMap
+except ImportError:
+ PyStringMap = None
+
+__all__ = ["Error", "copy", "deepcopy"]
+
+def copy(x):
+ """Shallow copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ cls = type(x)
+
+ copier = _copy_dispatch.get(cls)
+ if copier:
+ return copier(x)
+
+ copier = getattr(cls, "__copy__", None)
+ if copier:
+ return copier(x)
+
+ reductor = dispatch_table.get(cls)
+ if reductor:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor:
+ rv = reductor(2)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error("un(shallow)copyable object of type %s" % cls)
+
+ return _reconstruct(x, rv, 0)
+
+
+_copy_dispatch = d = {}
+
+def _copy_immutable(x):
+ return x
+for t in (type(None), int, long, float, bool, str, tuple,
+ frozenset, type, xrange, types.ClassType,
+ types.BuiltinFunctionType, type(Ellipsis),
+ types.FunctionType, weakref.ref):
+ d[t] = _copy_immutable
+for name in ("ComplexType", "UnicodeType", "CodeType"):
+ t = getattr(types, name, None)
+ if t is not None:
+ d[t] = _copy_immutable
+
+def _copy_with_constructor(x):
+ return type(x)(x)
+for t in (list, dict, set):
+ d[t] = _copy_with_constructor
+
+def _copy_with_copy_method(x):
+ return x.copy()
+if PyStringMap is not None:
+ d[PyStringMap] = _copy_with_copy_method
+
+def _copy_inst(x):
+ if hasattr(x, '__copy__'):
+ return x.__copy__()
+ if hasattr(x, '__getinitargs__'):
+ args = x.__getinitargs__()
+ y = x.__class__(*args)
+ else:
+ y = _EmptyClass()
+ y.__class__ = x.__class__
+ if hasattr(x, '__getstate__'):
+ state = x.__getstate__()
+ else:
+ state = x.__dict__
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ y.__dict__.update(state)
+ return y
+d[types.InstanceType] = _copy_inst
+
+del d
+
+def deepcopy(x, memo=None, _nil=[]):
+ """Deep copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ if memo is None:
+ memo = {}
+
+ d = id(x)
+ y = memo.get(d, _nil)
+ if y is not _nil:
+ return y
+
+ cls = type(x)
+
+ copier = _deepcopy_dispatch.get(cls)
+ if copier:
+ y = copier(x, memo)
+ else:
+ try:
+ issc = issubclass(cls, type)
+ except TypeError: # cls is not a class (old Boost; see SF #502085)
+ issc = 0
+ if issc:
+ y = _deepcopy_atomic(x, memo)
+ else:
+ copier = getattr(x, "__deepcopy__", None)
+ if copier:
+ y = copier(memo)
+ else:
+ reductor = dispatch_table.get(cls)
+ if reductor:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor:
+ rv = reductor(2)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error(
+ "un(deep)copyable object of type %s" % cls)
+ y = _reconstruct(x, rv, 1, memo)
+
+ memo[d] = y
+ _keep_alive(x, memo) # Make sure x lives at least as long as d
+ return y
+
+_deepcopy_dispatch = d = {}
+
+def _deepcopy_atomic(x, memo):
+ return x
+d[type(None)] = _deepcopy_atomic
+d[type(Ellipsis)] = _deepcopy_atomic
+d[int] = _deepcopy_atomic
+d[long] = _deepcopy_atomic
+d[float] = _deepcopy_atomic
+d[bool] = _deepcopy_atomic
+try:
+ d[complex] = _deepcopy_atomic
+except NameError:
+ pass
+d[str] = _deepcopy_atomic
+try:
+ d[unicode] = _deepcopy_atomic
+except NameError:
+ pass
+try:
+ d[types.CodeType] = _deepcopy_atomic
+except AttributeError:
+ pass
+d[type] = _deepcopy_atomic
+d[xrange] = _deepcopy_atomic
+d[types.ClassType] = _deepcopy_atomic
+d[types.BuiltinFunctionType] = _deepcopy_atomic
+d[types.FunctionType] = _deepcopy_atomic
+d[weakref.ref] = _deepcopy_atomic
+
+def _deepcopy_list(x, memo):
+ y = []
+ memo[id(x)] = y
+ for a in x:
+ y.append(deepcopy(a, memo))
+ return y
+d[list] = _deepcopy_list
+
+def _deepcopy_tuple(x, memo):
+ y = []
+ for a in x:
+ y.append(deepcopy(a, memo))
+ d = id(x)
+ try:
+ return memo[d]
+ except KeyError:
+ pass
+ for i in range(len(x)):
+ if x[i] is not y[i]:
+ y = tuple(y)
+ break
+ else:
+ y = x
+ memo[d] = y
+ return y
+d[tuple] = _deepcopy_tuple
+
+def _deepcopy_dict(x, memo):
+ y = {}
+ memo[id(x)] = y
+ for key, value in x.iteritems():
+ y[deepcopy(key, memo)] = deepcopy(value, memo)
+ return y
+d[dict] = _deepcopy_dict
+if PyStringMap is not None:
+ d[PyStringMap] = _deepcopy_dict
+
+def _deepcopy_method(x, memo): # Copy instance methods
+ return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class)
+_deepcopy_dispatch[types.MethodType] = _deepcopy_method
+
+def _keep_alive(x, memo):
+ """Keeps a reference to the object x in the memo.
+
+ Because we remember objects by their id, we have
+ to assure that possibly temporary objects are kept
+ alive by referencing them.
+ We store a reference at the id of the memo, which should
+ normally not be used unless someone tries to deepcopy
+ the memo itself...
+ """
+ try:
+ memo[id(memo)].append(x)
+ except KeyError:
+ # aha, this is the first one :-)
+ memo[id(memo)]=[x]
+
+def _deepcopy_inst(x, memo):
+ if hasattr(x, '__deepcopy__'):
+ return x.__deepcopy__(memo)
+ if hasattr(x, '__getinitargs__'):
+ args = x.__getinitargs__()
+ args = deepcopy(args, memo)
+ y = x.__class__(*args)
+ else:
+ y = _EmptyClass()
+ y.__class__ = x.__class__
+ memo[id(x)] = y
+ if hasattr(x, '__getstate__'):
+ state = x.__getstate__()
+ else:
+ state = x.__dict__
+ state = deepcopy(state, memo)
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ y.__dict__.update(state)
+ return y
+d[types.InstanceType] = _deepcopy_inst
+
+def _reconstruct(x, info, deep, memo=None):
+ if isinstance(info, str):
+ return x
+ assert isinstance(info, tuple)
+ if memo is None:
+ memo = {}
+ n = len(info)
+ assert n in (2, 3, 4, 5)
+ callable, args = info[:2]
+ if n > 2:
+ state = info[2]
+ else:
+ state = None
+ if n > 3:
+ listiter = info[3]
+ else:
+ listiter = None
+ if n > 4:
+ dictiter = info[4]
+ else:
+ dictiter = None
+ if deep:
+ args = deepcopy(args, memo)
+ y = callable(*args)
+ memo[id(x)] = y
+
+ if state is not None:
+ if deep:
+ state = deepcopy(state, memo)
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ else:
+ slotstate = None
+ if state is not None:
+ y.__dict__.update(state)
+ if slotstate is not None:
+ for key, value in slotstate.iteritems():
+ setattr(y, key, value)
+
+ if listiter is not None:
+ for item in listiter:
+ if deep:
+ item = deepcopy(item, memo)
+ y.append(item)
+ if dictiter is not None:
+ for key, value in dictiter:
+ if deep:
+ key = deepcopy(key, memo)
+ value = deepcopy(value, memo)
+ y[key] = value
+ return y
+
+del d
+
+del types
+
+# Helper for instance creation without calling __init__
+class _EmptyClass:
+ pass
+
+def _test():
+ l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'],
+ {'abc': 'ABC'}, (), [], {}]
+ l1 = copy(l)
+ print l1==l
+ l1 = map(copy, l)
+ print l1==l
+ l1 = deepcopy(l)
+ print l1==l
+ class C:
+ def __init__(self, arg=None):
+ self.a = 1
+ self.arg = arg
+ if __name__ == '__main__':
+ import sys
+ file = sys.argv[0]
+ else:
+ file = __file__
+ self.fp = open(file)
+ self.fp.close()
+ def __getstate__(self):
+ return {'a': self.a, 'arg': self.arg}
+ def __setstate__(self, state):
+ for key, value in state.iteritems():
+ setattr(self, key, value)
+ def __deepcopy__(self, memo=None):
+ new = self.__class__(deepcopy(self.arg, memo))
+ new.a = self.a
+ return new
+ c = C('argument sketch')
+ l.append(c)
+ l2 = copy(l)
+ print l == l2
+ print l
+ print l2
+ l2 = deepcopy(l)
+ print l == l2
+ print l
+ print l2
+ l.append({l[1]: l, 'xyz': l[2]})
+ l3 = copy(l)
+ import repr
+ print map(repr.repr, l)
+ print map(repr.repr, l1)
+ print map(repr.repr, l2)
+ print map(repr.repr, l3)
+ l3 = deepcopy(l)
+ import repr
+ print map(repr.repr, l)
+ print map(repr.repr, l1)
+ print map(repr.repr, l2)
+ print map(repr.repr, l3)
+ class odict(dict):
+ def __init__(self, d = {}):
+ self.a = 99
+ dict.__init__(self, d)
+ def __setitem__(self, k, i):
+ dict.__setitem__(self, k, i)
+ self.a
+ o = odict({"A" : "B"})
+ x = deepcopy(o)
+ print(o, x)
+
+if __name__ == '__main__':
+ _test()
diff --git a/cashew/Lib/csv.py b/cashew/Lib/csv.py
new file mode 100644
index 0000000..70c53ae
--- /dev/null
+++ b/cashew/Lib/csv.py
@@ -0,0 +1,456 @@
+
+"""
+csv.py - read/write/investigate CSV files
+"""
+
+import re
+from functools import reduce
+from _csv import Error, __version__, writer, reader, register_dialect, \
+ unregister_dialect, get_dialect, list_dialects, \
+ field_size_limit, \
+ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
+ __doc__
+from _csv import Dialect as _Dialect
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
+ "Error", "Dialect", "__doc__", "excel", "excel_tab",
+ "field_size_limit", "reader", "writer",
+ "register_dialect", "get_dialect", "list_dialects", "Sniffer",
+ "unregister_dialect", "__version__", "DictReader", "DictWriter" ]
+
+class Dialect:
+ """Describe an Excel dialect.
+
+ This must be subclassed (see csv.excel). Valid attributes are:
+ delimiter, quotechar, escapechar, doublequote, skipinitialspace,
+ lineterminator, quoting.
+
+ """
+ _name = ""
+ _valid = False
+ # placeholders
+ delimiter = None
+ quotechar = None
+ escapechar = None
+ doublequote = None
+ skipinitialspace = None
+ lineterminator = None
+ quoting = None
+
+ def __init__(self):
+ if self.__class__ != Dialect:
+ self._valid = True
+ self._validate()
+
+ def _validate(self):
+ try:
+ _Dialect(self)
+ except TypeError, e:
+ # We do this for compatibility with py2.3
+ raise Error(str(e))
+
+class excel(Dialect):
+ """Describe the usual properties of Excel-generated CSV files."""
+ delimiter = ','
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\r\n'
+ quoting = QUOTE_MINIMAL
+register_dialect("excel", excel)
+
+class excel_tab(excel):
+ """Describe the usual properties of Excel-generated TAB-delimited files."""
+ delimiter = '\t'
+register_dialect("excel-tab", excel_tab)
+
+
+class DictReader:
+ def __init__(self, f, fieldnames=None, restkey=None, restval=None,
+ dialect="excel", *args, **kwds):
+ self._fieldnames = fieldnames # list of keys for the dict
+ self.restkey = restkey # key to catch long rows
+ self.restval = restval # default value for short rows
+ self.reader = reader(f, dialect, *args, **kwds)
+ self.dialect = dialect
+ self.line_num = 0
+
+ def __iter__(self):
+ return self
+
+ @property
+ def fieldnames(self):
+ if self._fieldnames is None:
+ try:
+ self._fieldnames = self.reader.next()
+ except StopIteration:
+ pass
+ self.line_num = self.reader.line_num
+ return self._fieldnames
+
+ # Issue 20004: Because DictReader is a classic class, this setter is
+ # ignored. At this point in 2.7's lifecycle, it is too late to change the
+ # base class for fear of breaking working code. If you want to change
+ # fieldnames without overwriting the getter, set _fieldnames directly.
+ @fieldnames.setter
+ def fieldnames(self, value):
+ self._fieldnames = value
+
+ def next(self):
+ if self.line_num == 0:
+ # Used only for its side effect.
+ self.fieldnames
+ row = self.reader.next()
+ self.line_num = self.reader.line_num
+
+ # unlike the basic reader, we prefer not to return blanks,
+ # because we will typically wind up with a dict full of None
+ # values
+ while row == []:
+ row = self.reader.next()
+ d = dict(zip(self.fieldnames, row))
+ lf = len(self.fieldnames)
+ lr = len(row)
+ if lf < lr:
+ d[self.restkey] = row[lf:]
+ elif lf > lr:
+ for key in self.fieldnames[lr:]:
+ d[key] = self.restval
+ return d
+
+
+class DictWriter:
+ def __init__(self, f, fieldnames, restval="", extrasaction="raise",
+ dialect="excel", *args, **kwds):
+ self.fieldnames = fieldnames # list of keys for the dict
+ self.restval = restval # for writing short dicts
+ if extrasaction.lower() not in ("raise", "ignore"):
+ raise ValueError, \
+ ("extrasaction (%s) must be 'raise' or 'ignore'" %
+ extrasaction)
+ self.extrasaction = extrasaction
+ self.writer = writer(f, dialect, *args, **kwds)
+
+ def writeheader(self):
+ header = dict(zip(self.fieldnames, self.fieldnames))
+ self.writerow(header)
+
+ def _dict_to_list(self, rowdict):
+ if self.extrasaction == "raise":
+ wrong_fields = [k for k in rowdict if k not in self.fieldnames]
+ if wrong_fields:
+ raise ValueError("dict contains fields not in fieldnames: "
+ + ", ".join([repr(x) for x in wrong_fields]))
+ return [rowdict.get(key, self.restval) for key in self.fieldnames]
+
+ def writerow(self, rowdict):
+ return self.writer.writerow(self._dict_to_list(rowdict))
+
+ def writerows(self, rowdicts):
+ rows = []
+ for rowdict in rowdicts:
+ rows.append(self._dict_to_list(rowdict))
+ return self.writer.writerows(rows)
+
+# Guard Sniffer's type checking against builds that exclude complex()
+try:
+ complex
+except NameError:
+ complex = float
+
+class Sniffer:
+ '''
+ "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
+ Returns a Dialect object.
+ '''
+ def __init__(self):
+ # in case there is more than one possible delimiter
+ self.preferred = [',', '\t', ';', ' ', ':']
+
+
+ def sniff(self, sample, delimiters=None):
+ """
+ Returns a dialect (or None) corresponding to the sample
+ """
+
+ quotechar, doublequote, delimiter, skipinitialspace = \
+ self._guess_quote_and_delimiter(sample, delimiters)
+ if not delimiter:
+ delimiter, skipinitialspace = self._guess_delimiter(sample,
+ delimiters)
+
+ if not delimiter:
+ raise Error, "Could not determine delimiter"
+
+ class dialect(Dialect):
+ _name = "sniffed"
+ lineterminator = '\r\n'
+ quoting = QUOTE_MINIMAL
+ # escapechar = ''
+
+ dialect.doublequote = doublequote
+ dialect.delimiter = delimiter
+ # _csv.reader won't accept a quotechar of ''
+ dialect.quotechar = quotechar or '"'
+ dialect.skipinitialspace = skipinitialspace
+
+ return dialect
+
+
+ def _guess_quote_and_delimiter(self, data, delimiters):
+ """
+ Looks for text enclosed between two identical quotes
+ (the probable quotechar) which are preceded and followed
+ by the same character (the probable delimiter).
+ For example:
+ ,'some text',
+ The quote with the most wins, same with the delimiter.
+ If there is no quotechar the delimiter can't be determined
+ this way.
+ """
+
+ matches = []
+ for restr in ('(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?",
+ '(?:^|\n)(?P["\']).*?(?P=quote)(?P[^\w\n"\'])(?P ?)', # ".*?",
+ '(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
+ '(?:^|\n)(?P["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
+ regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
+ matches = regexp.findall(data)
+ if matches:
+ break
+
+ if not matches:
+ # (quotechar, doublequote, delimiter, skipinitialspace)
+ return ('', False, None, 0)
+ quotes = {}
+ delims = {}
+ spaces = 0
+ for m in matches:
+ n = regexp.groupindex['quote'] - 1
+ key = m[n]
+ if key:
+ quotes[key] = quotes.get(key, 0) + 1
+ try:
+ n = regexp.groupindex['delim'] - 1
+ key = m[n]
+ except KeyError:
+ continue
+ if key and (delimiters is None or key in delimiters):
+ delims[key] = delims.get(key, 0) + 1
+ try:
+ n = regexp.groupindex['space'] - 1
+ except KeyError:
+ continue
+ if m[n]:
+ spaces += 1
+
+ quotechar = reduce(lambda a, b, quotes = quotes:
+ (quotes[a] > quotes[b]) and a or b, quotes.keys())
+
+ if delims:
+ delim = reduce(lambda a, b, delims = delims:
+ (delims[a] > delims[b]) and a or b, delims.keys())
+ skipinitialspace = delims[delim] == spaces
+ if delim == '\n': # most likely a file with a single column
+ delim = ''
+ else:
+ # there is *no* delimiter, it's a single column of quoted data
+ delim = ''
+ skipinitialspace = 0
+
+ # if we see an extra quote between delimiters, we've got a
+ # double quoted format
+ dq_regexp = re.compile(
+ r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+ {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
+
+
+
+ if dq_regexp.search(data):
+ doublequote = True
+ else:
+ doublequote = False
+
+ return (quotechar, doublequote, delim, skipinitialspace)
+
+
+ def _guess_delimiter(self, data, delimiters):
+ """
+ The delimiter /should/ occur the same number of times on
+ each row. However, due to malformed data, it may not. We don't want
+ an all or nothing approach, so we allow for small variations in this
+ number.
+ 1) build a table of the frequency of each character on every line.
+ 2) build a table of frequencies of this frequency (meta-frequency?),
+ e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
+ 7 times in 2 rows'
+ 3) use the mode of the meta-frequency to determine the /expected/
+ frequency for that character
+ 4) find out how often the character actually meets that goal
+ 5) the character that best meets its goal is the delimiter
+ For performance reasons, the data is evaluated in chunks, so it can
+ try and evaluate the smallest portion of the data possible, evaluating
+ additional chunks as necessary.
+ """
+
+ data = filter(None, data.split('\n'))
+
+ ascii = [chr(c) for c in range(127)] # 7-bit ASCII
+
+ # build frequency tables
+ chunkLength = min(10, len(data))
+ iteration = 0
+ charFrequency = {}
+ modes = {}
+ delims = {}
+ start, end = 0, min(chunkLength, len(data))
+ while start < len(data):
+ iteration += 1
+ for line in data[start:end]:
+ for char in ascii:
+ metaFrequency = charFrequency.get(char, {})
+ # must count even if frequency is 0
+ freq = line.count(char)
+ # value is the mode
+ metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
+ charFrequency[char] = metaFrequency
+
+ for char in charFrequency.keys():
+ items = charFrequency[char].items()
+ if len(items) == 1 and items[0][0] == 0:
+ continue
+ # get the mode of the frequencies
+ if len(items) > 1:
+ modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b,
+ items)
+ # adjust the mode - subtract the sum of all
+ # other frequencies
+ items.remove(modes[char])
+ modes[char] = (modes[char][0], modes[char][1]
+ - reduce(lambda a, b: (0, a[1] + b[1]),
+ items)[1])
+ else:
+ modes[char] = items[0]
+
+ # build a list of possible delimiters
+ modeList = modes.items()
+ total = float(chunkLength * iteration)
+ # (rows of consistent data) / (number of rows) = 100%
+ consistency = 1.0
+ # minimum consistency threshold
+ threshold = 0.9
+ while len(delims) == 0 and consistency >= threshold:
+ for k, v in modeList:
+ if v[0] > 0 and v[1] > 0:
+ if ((v[1]/total) >= consistency and
+ (delimiters is None or k in delimiters)):
+ delims[k] = v
+ consistency -= 0.01
+
+ if len(delims) == 1:
+ delim = delims.keys()[0]
+ skipinitialspace = (data[0].count(delim) ==
+ data[0].count("%c " % delim))
+ return (delim, skipinitialspace)
+
+ # analyze another chunkLength lines
+ start = end
+ end += chunkLength
+
+ if not delims:
+ return ('', 0)
+
+ # if there's more than one, fall back to a 'preferred' list
+ if len(delims) > 1:
+ for d in self.preferred:
+ if d in delims.keys():
+ skipinitialspace = (data[0].count(d) ==
+ data[0].count("%c " % d))
+ return (d, skipinitialspace)
+
+ # nothing else indicates a preference, pick the character that
+ # dominates(?)
+ items = [(v,k) for (k,v) in delims.items()]
+ items.sort()
+ delim = items[-1][1]
+
+ skipinitialspace = (data[0].count(delim) ==
+ data[0].count("%c " % delim))
+ return (delim, skipinitialspace)
+
+
+ def has_header(self, sample):
+ # Creates a dictionary of types of data in each column. If any
+ # column is of a single type (say, integers), *except* for the first
+ # row, then the first row is presumed to be labels. If the type
+ # can't be determined, it is assumed to be a string in which case
+ # the length of the string is the determining factor: if all of the
+ # rows except for the first are the same length, it's a header.
+ # Finally, a 'vote' is taken at the end for each column, adding or
+ # subtracting from the likelihood of the first row being a header.
+
+ rdr = reader(StringIO(sample), self.sniff(sample))
+
+ header = rdr.next() # assume first row is header
+
+ columns = len(header)
+ columnTypes = {}
+ for i in range(columns): columnTypes[i] = None
+
+ checked = 0
+ for row in rdr:
+ # arbitrary number of rows to check, to keep it sane
+ if checked > 20:
+ break
+ checked += 1
+
+ if len(row) != columns:
+ continue # skip rows that have irregular number of columns
+
+ for col in columnTypes.keys():
+
+ for thisType in [int, long, float, complex]:
+ try:
+ thisType(row[col])
+ break
+ except (ValueError, OverflowError):
+ pass
+ else:
+ # fallback to length of string
+ thisType = len(row[col])
+
+ # treat longs as ints
+ if thisType == long:
+ thisType = int
+
+ if thisType != columnTypes[col]:
+ if columnTypes[col] is None: # add new column type
+ columnTypes[col] = thisType
+ else:
+ # type is inconsistent, remove column from
+ # consideration
+ del columnTypes[col]
+
+ # finally, compare results against first row and "vote"
+ # on whether it's a header
+ hasHeader = 0
+ for col, colType in columnTypes.items():
+ if type(colType) == type(0): # it's a length
+ if len(header[col]) != colType:
+ hasHeader += 1
+ else:
+ hasHeader -= 1
+ else: # attempt typecast
+ try:
+ colType(header[col])
+ except (ValueError, TypeError):
+ hasHeader += 1
+ else:
+ hasHeader -= 1
+
+ return hasHeader > 0
diff --git a/cashew/Lib/ctypes/__init__.py b/cashew/Lib/ctypes/__init__.py
new file mode 100644
index 0000000..d3855b2
--- /dev/null
+++ b/cashew/Lib/ctypes/__init__.py
@@ -0,0 +1,558 @@
+"""create and manipulate C data types in Python"""
+
+import os as _os, sys as _sys
+
+__version__ = "1.1.0"
+
+from _ctypes import Union, Structure, Array
+from _ctypes import _Pointer
+from _ctypes import CFuncPtr as _CFuncPtr
+from _ctypes import __version__ as _ctypes_version
+from _ctypes import RTLD_LOCAL, RTLD_GLOBAL
+from _ctypes import ArgumentError
+
+from struct import calcsize as _calcsize
+
+if __version__ != _ctypes_version:
+ raise Exception("Version number mismatch", __version__, _ctypes_version)
+
+if _os.name in ("nt", "ce"):
+ from _ctypes import FormatError
+
+DEFAULT_MODE = RTLD_LOCAL
+if _os.name == "posix" and _sys.platform == "darwin":
+ # On OS X 10.3, we use RTLD_GLOBAL as default mode
+ # because RTLD_LOCAL does not work at least on some
+ # libraries. OS X 10.3 is Darwin 7, so we check for
+ # that.
+
+ if int(_os.uname()[2].split('.')[0]) < 8:
+ DEFAULT_MODE = RTLD_GLOBAL
+
+from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
+ FUNCFLAG_PYTHONAPI as _FUNCFLAG_PYTHONAPI, \
+ FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
+ FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
+
+"""
+WINOLEAPI -> HRESULT
+WINOLEAPI_(type)
+
+STDMETHODCALLTYPE
+
+STDMETHOD(name)
+STDMETHOD_(type, name)
+
+STDAPICALLTYPE
+"""
+
+def create_string_buffer(init, size=None):
+ """create_string_buffer(aString) -> character array
+ create_string_buffer(anInteger) -> character array
+ create_string_buffer(aString, anInteger) -> character array
+ """
+ if isinstance(init, (str, unicode)):
+ if size is None:
+ size = len(init)+1
+ buftype = c_char * size
+ buf = buftype()
+ buf.value = init
+ return buf
+ elif isinstance(init, (int, long)):
+ buftype = c_char * init
+ buf = buftype()
+ return buf
+ raise TypeError(init)
+
+def c_buffer(init, size=None):
+## "deprecated, use create_string_buffer instead"
+## import warnings
+## warnings.warn("c_buffer is deprecated, use create_string_buffer instead",
+## DeprecationWarning, stacklevel=2)
+ return create_string_buffer(init, size)
+
+_c_functype_cache = {}
+def CFUNCTYPE(restype, *argtypes, **kw):
+ """CFUNCTYPE(restype, *argtypes,
+ use_errno=False, use_last_error=False) -> function prototype.
+
+ restype: the result type
+ argtypes: a sequence specifying the argument types
+
+ The function prototype can be called in different ways to create a
+ callable object:
+
+ prototype(integer address) -> foreign function
+ prototype(callable) -> create and return a C callable function from callable
+ prototype(integer index, method name[, paramflags]) -> foreign function calling a COM method
+ prototype((ordinal number, dll object)[, paramflags]) -> foreign function exported by ordinal
+ prototype((function name, dll object)[, paramflags]) -> foreign function exported by name
+ """
+ flags = _FUNCFLAG_CDECL
+ if kw.pop("use_errno", False):
+ flags |= _FUNCFLAG_USE_ERRNO
+ if kw.pop("use_last_error", False):
+ flags |= _FUNCFLAG_USE_LASTERROR
+ if kw:
+ raise ValueError("unexpected keyword argument(s) %s" % kw.keys())
+ try:
+ return _c_functype_cache[(restype, argtypes, flags)]
+ except KeyError:
+ class CFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = flags
+ _c_functype_cache[(restype, argtypes, flags)] = CFunctionType
+ return CFunctionType
+
+if _os.name in ("nt", "ce"):
+ from _ctypes import LoadLibrary as _dlopen
+ from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL
+ if _os.name == "ce":
+ # 'ce' doesn't have the stdcall calling convention
+ _FUNCFLAG_STDCALL = _FUNCFLAG_CDECL
+
+ _win_functype_cache = {}
+ def WINFUNCTYPE(restype, *argtypes, **kw):
+ # docstring set later (very similar to CFUNCTYPE.__doc__)
+ flags = _FUNCFLAG_STDCALL
+ if kw.pop("use_errno", False):
+ flags |= _FUNCFLAG_USE_ERRNO
+ if kw.pop("use_last_error", False):
+ flags |= _FUNCFLAG_USE_LASTERROR
+ if kw:
+ raise ValueError("unexpected keyword argument(s) %s" % kw.keys())
+ try:
+ return _win_functype_cache[(restype, argtypes, flags)]
+ except KeyError:
+ class WinFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = flags
+ _win_functype_cache[(restype, argtypes, flags)] = WinFunctionType
+ return WinFunctionType
+ if WINFUNCTYPE.__doc__:
+ WINFUNCTYPE.__doc__ = CFUNCTYPE.__doc__.replace("CFUNCTYPE", "WINFUNCTYPE")
+
+elif _os.name == "posix":
+ from _ctypes import dlopen as _dlopen
+
+from _ctypes import sizeof, byref, addressof, alignment, resize
+from _ctypes import get_errno, set_errno
+from _ctypes import _SimpleCData
+
+def _check_size(typ, typecode=None):
+ # Check if sizeof(ctypes_type) against struct.calcsize. This
+ # should protect somewhat against a misconfigured libffi.
+ from struct import calcsize
+ if typecode is None:
+ # Most _type_ codes are the same as used in struct
+ typecode = typ._type_
+ actual, required = sizeof(typ), calcsize(typecode)
+ if actual != required:
+ raise SystemError("sizeof(%s) wrong: %d instead of %d" % \
+ (typ, actual, required))
+
+class py_object(_SimpleCData):
+ _type_ = "O"
+ def __repr__(self):
+ try:
+ return super(py_object, self).__repr__()
+ except ValueError:
+ return "%s()" % type(self).__name__
+_check_size(py_object, "P")
+
+class c_short(_SimpleCData):
+ _type_ = "h"
+_check_size(c_short)
+
+class c_ushort(_SimpleCData):
+ _type_ = "H"
+_check_size(c_ushort)
+
+class c_long(_SimpleCData):
+ _type_ = "l"
+_check_size(c_long)
+
+class c_ulong(_SimpleCData):
+ _type_ = "L"
+_check_size(c_ulong)
+
+if _calcsize("i") == _calcsize("l"):
+ # if int and long have the same size, make c_int an alias for c_long
+ c_int = c_long
+ c_uint = c_ulong
+else:
+ class c_int(_SimpleCData):
+ _type_ = "i"
+ _check_size(c_int)
+
+ class c_uint(_SimpleCData):
+ _type_ = "I"
+ _check_size(c_uint)
+
+class c_float(_SimpleCData):
+ _type_ = "f"
+_check_size(c_float)
+
+class c_double(_SimpleCData):
+ _type_ = "d"
+_check_size(c_double)
+
+class c_longdouble(_SimpleCData):
+ _type_ = "g"
+if sizeof(c_longdouble) == sizeof(c_double):
+ c_longdouble = c_double
+
+if _calcsize("l") == _calcsize("q"):
+ # if long and long long have the same size, make c_longlong an alias for c_long
+ c_longlong = c_long
+ c_ulonglong = c_ulong
+else:
+ class c_longlong(_SimpleCData):
+ _type_ = "q"
+ _check_size(c_longlong)
+
+ class c_ulonglong(_SimpleCData):
+ _type_ = "Q"
+ ## def from_param(cls, val):
+ ## return ('d', float(val), val)
+ ## from_param = classmethod(from_param)
+ _check_size(c_ulonglong)
+
+class c_ubyte(_SimpleCData):
+ _type_ = "B"
+c_ubyte.__ctype_le__ = c_ubyte.__ctype_be__ = c_ubyte
+# backward compatibility:
+##c_uchar = c_ubyte
+_check_size(c_ubyte)
+
+class c_byte(_SimpleCData):
+ _type_ = "b"
+c_byte.__ctype_le__ = c_byte.__ctype_be__ = c_byte
+_check_size(c_byte)
+
+class c_char(_SimpleCData):
+ _type_ = "c"
+c_char.__ctype_le__ = c_char.__ctype_be__ = c_char
+_check_size(c_char)
+
+class c_char_p(_SimpleCData):
+ _type_ = "z"
+ if _os.name == "nt":
+ def __repr__(self):
+ if not windll.kernel32.IsBadStringPtrA(self, -1):
+ return "%s(%r)" % (self.__class__.__name__, self.value)
+ return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value)
+ else:
+ def __repr__(self):
+ return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value)
+_check_size(c_char_p, "P")
+
+class c_void_p(_SimpleCData):
+ _type_ = "P"
+c_voidp = c_void_p # backwards compatibility (to a bug)
+_check_size(c_void_p)
+
+class c_bool(_SimpleCData):
+ _type_ = "?"
+
+from _ctypes import POINTER, pointer, _pointer_type_cache
+
+def _reset_cache():
+ _pointer_type_cache.clear()
+ _c_functype_cache.clear()
+ if _os.name in ("nt", "ce"):
+ _win_functype_cache.clear()
+ # _SimpleCData.c_wchar_p_from_param
+ POINTER(c_wchar).from_param = c_wchar_p.from_param
+ # _SimpleCData.c_char_p_from_param
+ POINTER(c_char).from_param = c_char_p.from_param
+ _pointer_type_cache[None] = c_void_p
+ # XXX for whatever reasons, creating the first instance of a callback
+ # function is needed for the unittests on Win64 to succeed. This MAY
+ # be a compiler bug, since the problem occurs only when _ctypes is
+ # compiled with the MS SDK compiler. Or an uninitialized variable?
+ CFUNCTYPE(c_int)(lambda: None)
+
+try:
+ from _ctypes import set_conversion_mode
+except ImportError:
+ pass
+else:
+ if _os.name in ("nt", "ce"):
+ set_conversion_mode("mbcs", "ignore")
+ else:
+ set_conversion_mode("ascii", "strict")
+
+ class c_wchar_p(_SimpleCData):
+ _type_ = "Z"
+
+ class c_wchar(_SimpleCData):
+ _type_ = "u"
+
+ def create_unicode_buffer(init, size=None):
+ """create_unicode_buffer(aString) -> character array
+ create_unicode_buffer(anInteger) -> character array
+ create_unicode_buffer(aString, anInteger) -> character array
+ """
+ if isinstance(init, (str, unicode)):
+ if size is None:
+ size = len(init)+1
+ buftype = c_wchar * size
+ buf = buftype()
+ buf.value = init
+ return buf
+ elif isinstance(init, (int, long)):
+ buftype = c_wchar * init
+ buf = buftype()
+ return buf
+ raise TypeError(init)
+
+# XXX Deprecated
+def SetPointerType(pointer, cls):
+ if _pointer_type_cache.get(cls, None) is not None:
+ raise RuntimeError("This type already exists in the cache")
+ if id(pointer) not in _pointer_type_cache:
+ raise RuntimeError("What's this???")
+ pointer.set_type(cls)
+ _pointer_type_cache[cls] = pointer
+ del _pointer_type_cache[id(pointer)]
+
+# XXX Deprecated
+def ARRAY(typ, len):
+ return typ * len
+
+################################################################
+
+
+class CDLL(object):
+ """An instance of this class represents a loaded dll/shared
+ library, exporting functions using the standard C calling
+ convention (named 'cdecl' on Windows).
+
+ The exported functions can be accessed as attributes, or by
+ indexing with the function name. Examples:
+
+ .qsort -> callable object
+ ['qsort'] -> callable object
+
+ Calling the functions releases the Python GIL during the call and
+ reacquires it afterwards.
+ """
+ _func_flags_ = _FUNCFLAG_CDECL
+ _func_restype_ = c_int
+ # default values for repr
+ _name = ''
+ _handle = 0
+ _FuncPtr = None
+
+ def __init__(self, name, mode=DEFAULT_MODE, handle=None,
+ use_errno=False,
+ use_last_error=False):
+ self._name = name
+ flags = self._func_flags_
+ if use_errno:
+ flags |= _FUNCFLAG_USE_ERRNO
+ if use_last_error:
+ flags |= _FUNCFLAG_USE_LASTERROR
+
+ class _FuncPtr(_CFuncPtr):
+ _flags_ = flags
+ _restype_ = self._func_restype_
+ self._FuncPtr = _FuncPtr
+
+ if handle is None:
+ self._handle = _dlopen(self._name, mode)
+ else:
+ self._handle = handle
+
+ def __repr__(self):
+ return "<%s '%s', handle %x at %x>" % \
+ (self.__class__.__name__, self._name,
+ (self._handle & (_sys.maxint*2 + 1)),
+ id(self) & (_sys.maxint*2 + 1))
+
+ def __getattr__(self, name):
+ if name.startswith('__') and name.endswith('__'):
+ raise AttributeError(name)
+ func = self.__getitem__(name)
+ setattr(self, name, func)
+ return func
+
+ def __getitem__(self, name_or_ordinal):
+ func = self._FuncPtr((name_or_ordinal, self))
+ if not isinstance(name_or_ordinal, (int, long)):
+ func.__name__ = name_or_ordinal
+ return func
+
+class PyDLL(CDLL):
+ """This class represents the Python library itself. It allows
+ accessing Python API functions. The GIL is not released, and
+ Python exceptions are handled correctly.
+ """
+ _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
+
+if _os.name in ("nt", "ce"):
+
+ class WinDLL(CDLL):
+ """This class represents a dll exporting functions using the
+ Windows stdcall calling convention.
+ """
+ _func_flags_ = _FUNCFLAG_STDCALL
+
+ # XXX Hm, what about HRESULT as normal parameter?
+ # Mustn't it derive from c_long then?
+ from _ctypes import _check_HRESULT, _SimpleCData
+ class HRESULT(_SimpleCData):
+ _type_ = "l"
+ # _check_retval_ is called with the function's result when it
+ # is used as restype. It checks for the FAILED bit, and
+ # raises a WindowsError if it is set.
+ #
+ # The _check_retval_ method is implemented in C, so that the
+ # method definition itself is not included in the traceback
+ # when it raises an error - that is what we want (and Python
+ # doesn't have a way to raise an exception in the caller's
+ # frame).
+ _check_retval_ = _check_HRESULT
+
+ class OleDLL(CDLL):
+ """This class represents a dll exporting functions using the
+ Windows stdcall calling convention, and returning HRESULT.
+ HRESULT error values are automatically raised as WindowsError
+ exceptions.
+ """
+ _func_flags_ = _FUNCFLAG_STDCALL
+ _func_restype_ = HRESULT
+
+class LibraryLoader(object):
+ def __init__(self, dlltype):
+ self._dlltype = dlltype
+
+ def __getattr__(self, name):
+ if name[0] == '_':
+ raise AttributeError(name)
+ dll = self._dlltype(name)
+ setattr(self, name, dll)
+ return dll
+
+ def __getitem__(self, name):
+ return getattr(self, name)
+
+ def LoadLibrary(self, name):
+ return self._dlltype(name)
+
+cdll = LibraryLoader(CDLL)
+pydll = LibraryLoader(PyDLL)
+
+if _os.name in ("nt", "ce"):
+ pythonapi = PyDLL("python dll", None, _sys.dllhandle)
+elif _sys.platform == "cygwin":
+ pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2])
+elif _sys.platform == "cli": # Need to determine how to do this
+ pythonapi = None
+else:
+ pythonapi = PyDLL(None)
+
+
+if _os.name in ("nt", "ce"):
+ windll = LibraryLoader(WinDLL)
+ oledll = LibraryLoader(OleDLL)
+
+ if _os.name == "nt":
+ GetLastError = windll.kernel32.GetLastError
+ else:
+ GetLastError = windll.coredll.GetLastError
+ from _ctypes import get_last_error, set_last_error
+
+ def WinError(code=None, descr=None):
+ if code is None:
+ code = GetLastError()
+ if descr is None:
+ descr = FormatError(code).strip()
+ return WindowsError(code, descr)
+
+if sizeof(c_uint) == sizeof(c_void_p):
+ c_size_t = c_uint
+ c_ssize_t = c_int
+elif sizeof(c_ulong) == sizeof(c_void_p):
+ c_size_t = c_ulong
+ c_ssize_t = c_long
+elif sizeof(c_ulonglong) == sizeof(c_void_p):
+ c_size_t = c_ulonglong
+ c_ssize_t = c_longlong
+
+# functions
+
+from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr
+
+## void *memmove(void *, const void *, size_t);
+memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr)
+
+## void *memset(void *, int, size_t)
+memset = CFUNCTYPE(c_void_p, c_void_p, c_int, c_size_t)(_memset_addr)
+
+def PYFUNCTYPE(restype, *argtypes):
+ class CFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
+ return CFunctionType
+
+_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr)
+def cast(obj, typ):
+ return _cast(obj, obj, typ)
+
+_string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr)
+def string_at(ptr, size=-1):
+ """string_at(addr[, size]) -> string
+
+ Return the string at addr."""
+ return _string_at(ptr, size)
+
+try:
+ from _ctypes import _wstring_at_addr
+except ImportError:
+ pass
+else:
+ _wstring_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_wstring_at_addr)
+ def wstring_at(ptr, size=-1):
+ """wstring_at(addr[, size]) -> string
+
+ Return the string at addr."""
+ return _wstring_at(ptr, size)
+
+
+if _os.name in ("nt", "ce"): # COM stuff
+ def DllGetClassObject(rclsid, riid, ppv):
+ try:
+ ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*'])
+ except ImportError:
+ return -2147221231 # CLASS_E_CLASSNOTAVAILABLE
+ else:
+ return ccom.DllGetClassObject(rclsid, riid, ppv)
+
+ def DllCanUnloadNow():
+ try:
+ ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*'])
+ except ImportError:
+ return 0 # S_OK
+ return ccom.DllCanUnloadNow()
+
+from ctypes._endian import BigEndianStructure, LittleEndianStructure
+
+# Fill in specifically-sized types
+c_int8 = c_byte
+c_uint8 = c_ubyte
+for kind in [c_short, c_int, c_long, c_longlong]:
+ if sizeof(kind) == 2: c_int16 = kind
+ elif sizeof(kind) == 4: c_int32 = kind
+ elif sizeof(kind) == 8: c_int64 = kind
+for kind in [c_ushort, c_uint, c_ulong, c_ulonglong]:
+ if sizeof(kind) == 2: c_uint16 = kind
+ elif sizeof(kind) == 4: c_uint32 = kind
+ elif sizeof(kind) == 8: c_uint64 = kind
+del(kind)
+
+_reset_cache()
diff --git a/cashew/Lib/ctypes/_endian.py b/cashew/Lib/ctypes/_endian.py
new file mode 100644
index 0000000..c0ba646
--- /dev/null
+++ b/cashew/Lib/ctypes/_endian.py
@@ -0,0 +1,61 @@
+import sys
+from ctypes import *
+
+_array_type = type(Array)
+
+def _other_endian(typ):
+ """Return the type with the 'other' byte order. Simple types like
+ c_int and so on already have __ctype_be__ and __ctype_le__
+ attributes which contain the types, for more complicated types
+ arrays and structures are supported.
+ """
+ # check _OTHER_ENDIAN attribute (present if typ is primitive type)
+ if hasattr(typ, _OTHER_ENDIAN):
+ return getattr(typ, _OTHER_ENDIAN)
+ # if typ is array
+ if isinstance(typ, _array_type):
+ return _other_endian(typ._type_) * typ._length_
+ # if typ is structure
+ if issubclass(typ, Structure):
+ return typ
+ raise TypeError("This type does not support other endian: %s" % typ)
+
+class _swapped_meta(type(Structure)):
+ def __setattr__(self, attrname, value):
+ if attrname == "_fields_":
+ fields = []
+ for desc in value:
+ name = desc[0]
+ typ = desc[1]
+ rest = desc[2:]
+ fields.append((name, _other_endian(typ)) + rest)
+ value = fields
+ super(_swapped_meta, self).__setattr__(attrname, value)
+
+################################################################
+
+# Note: The Structure metaclass checks for the *presence* (not the
+# value!) of a _swapped_bytes_ attribute to determine the bit order in
+# structures containing bit fields.
+
+if sys.byteorder == "little":
+ _OTHER_ENDIAN = "__ctype_be__"
+
+ LittleEndianStructure = Structure
+
+ class BigEndianStructure(Structure):
+ """Structure with big endian byte order"""
+ __metaclass__ = _swapped_meta
+ _swappedbytes_ = None
+
+elif sys.byteorder == "big":
+ _OTHER_ENDIAN = "__ctype_le__"
+
+ BigEndianStructure = Structure
+ class LittleEndianStructure(Structure):
+ """Structure with little endian byte order"""
+ __metaclass__ = _swapped_meta
+ _swappedbytes_ = None
+
+else:
+ raise RuntimeError("Invalid byteorder")
diff --git a/cashew/Lib/ctypes/macholib/__init__.py b/cashew/Lib/ctypes/macholib/__init__.py
new file mode 100644
index 0000000..5621def
--- /dev/null
+++ b/cashew/Lib/ctypes/macholib/__init__.py
@@ -0,0 +1,9 @@
+"""
+Enough Mach-O to make your head spin.
+
+See the relevant header files in /usr/include/mach-o
+
+And also Apple's documentation.
+"""
+
+__version__ = '1.0'
diff --git a/cashew/Lib/ctypes/macholib/dyld.py b/cashew/Lib/ctypes/macholib/dyld.py
new file mode 100644
index 0000000..1fdf8d6
--- /dev/null
+++ b/cashew/Lib/ctypes/macholib/dyld.py
@@ -0,0 +1,166 @@
+"""
+dyld emulation
+"""
+
+import os
+from framework import framework_info
+from dylib import dylib_info
+from itertools import *
+
+__all__ = [
+ 'dyld_find', 'framework_find',
+ 'framework_info', 'dylib_info',
+]
+
+# These are the defaults as per man dyld(1)
+#
+DEFAULT_FRAMEWORK_FALLBACK = [
+ os.path.expanduser("~/Library/Frameworks"),
+ "/Library/Frameworks",
+ "/Network/Library/Frameworks",
+ "/System/Library/Frameworks",
+]
+
+DEFAULT_LIBRARY_FALLBACK = [
+ os.path.expanduser("~/lib"),
+ "/usr/local/lib",
+ "/lib",
+ "/usr/lib",
+]
+
+def ensure_utf8(s):
+ """Not all of PyObjC and Python understand unicode paths very well yet"""
+ if isinstance(s, unicode):
+ return s.encode('utf8')
+ return s
+
+def dyld_env(env, var):
+ if env is None:
+ env = os.environ
+ rval = env.get(var)
+ if rval is None:
+ return []
+ return rval.split(':')
+
+def dyld_image_suffix(env=None):
+ if env is None:
+ env = os.environ
+ return env.get('DYLD_IMAGE_SUFFIX')
+
+def dyld_framework_path(env=None):
+ return dyld_env(env, 'DYLD_FRAMEWORK_PATH')
+
+def dyld_library_path(env=None):
+ return dyld_env(env, 'DYLD_LIBRARY_PATH')
+
+def dyld_fallback_framework_path(env=None):
+ return dyld_env(env, 'DYLD_FALLBACK_FRAMEWORK_PATH')
+
+def dyld_fallback_library_path(env=None):
+ return dyld_env(env, 'DYLD_FALLBACK_LIBRARY_PATH')
+
+def dyld_image_suffix_search(iterator, env=None):
+ """For a potential path iterator, add DYLD_IMAGE_SUFFIX semantics"""
+ suffix = dyld_image_suffix(env)
+ if suffix is None:
+ return iterator
+ def _inject(iterator=iterator, suffix=suffix):
+ for path in iterator:
+ if path.endswith('.dylib'):
+ yield path[:-len('.dylib')] + suffix + '.dylib'
+ else:
+ yield path + suffix
+ yield path
+ return _inject()
+
+def dyld_override_search(name, env=None):
+ # If DYLD_FRAMEWORK_PATH is set and this dylib_name is a
+ # framework name, use the first file that exists in the framework
+ # path if any. If there is none go on to search the DYLD_LIBRARY_PATH
+ # if any.
+
+ framework = framework_info(name)
+
+ if framework is not None:
+ for path in dyld_framework_path(env):
+ yield os.path.join(path, framework['name'])
+
+ # If DYLD_LIBRARY_PATH is set then use the first file that exists
+ # in the path. If none use the original name.
+ for path in dyld_library_path(env):
+ yield os.path.join(path, os.path.basename(name))
+
+def dyld_executable_path_search(name, executable_path=None):
+ # If we haven't done any searching and found a library and the
+ # dylib_name starts with "@executable_path/" then construct the
+ # library name.
+ if name.startswith('@executable_path/') and executable_path is not None:
+ yield os.path.join(executable_path, name[len('@executable_path/'):])
+
+def dyld_default_search(name, env=None):
+ yield name
+
+ framework = framework_info(name)
+
+ if framework is not None:
+ fallback_framework_path = dyld_fallback_framework_path(env)
+ for path in fallback_framework_path:
+ yield os.path.join(path, framework['name'])
+
+ fallback_library_path = dyld_fallback_library_path(env)
+ for path in fallback_library_path:
+ yield os.path.join(path, os.path.basename(name))
+
+ if framework is not None and not fallback_framework_path:
+ for path in DEFAULT_FRAMEWORK_FALLBACK:
+ yield os.path.join(path, framework['name'])
+
+ if not fallback_library_path:
+ for path in DEFAULT_LIBRARY_FALLBACK:
+ yield os.path.join(path, os.path.basename(name))
+
+def dyld_find(name, executable_path=None, env=None):
+ """
+ Find a library or framework using dyld semantics
+ """
+ name = ensure_utf8(name)
+ executable_path = ensure_utf8(executable_path)
+ for path in dyld_image_suffix_search(chain(
+ dyld_override_search(name, env),
+ dyld_executable_path_search(name, executable_path),
+ dyld_default_search(name, env),
+ ), env):
+ if os.path.isfile(path):
+ return path
+ raise ValueError("dylib %s could not be found" % (name,))
+
+def framework_find(fn, executable_path=None, env=None):
+ """
+ Find a framework using dyld semantics in a very loose manner.
+
+ Will take input such as:
+ Python
+ Python.framework
+ Python.framework/Versions/Current
+ """
+ try:
+ return dyld_find(fn, executable_path=executable_path, env=env)
+ except ValueError, e:
+ pass
+ fmwk_index = fn.rfind('.framework')
+ if fmwk_index == -1:
+ fmwk_index = len(fn)
+ fn += '.framework'
+ fn = os.path.join(fn, os.path.basename(fn[:fmwk_index]))
+ try:
+ return dyld_find(fn, executable_path=executable_path, env=env)
+ except ValueError:
+ raise e
+
+def test_dyld_find():
+ env = {}
+ assert dyld_find('libSystem.dylib') == '/usr/lib/libSystem.dylib'
+ assert dyld_find('System.framework/System') == '/System/Library/Frameworks/System.framework/System'
+
+if __name__ == '__main__':
+ test_dyld_find()
diff --git a/cashew/Lib/ctypes/macholib/dylib.py b/cashew/Lib/ctypes/macholib/dylib.py
new file mode 100644
index 0000000..aa10750
--- /dev/null
+++ b/cashew/Lib/ctypes/macholib/dylib.py
@@ -0,0 +1,63 @@
+"""
+Generic dylib path manipulation
+"""
+
+import re
+
+__all__ = ['dylib_info']
+
+DYLIB_RE = re.compile(r"""(?x)
+(?P^.*)(?:^|/)
+(?P
+ (?P\w+?)
+ (?:\.(?P[^._]+))?
+ (?:_(?P[^._]+))?
+ \.dylib$
+)
+""")
+
+def dylib_info(filename):
+ """
+ A dylib name can take one of the following four forms:
+ Location/Name.SomeVersion_Suffix.dylib
+ Location/Name.SomeVersion.dylib
+ Location/Name_Suffix.dylib
+ Location/Name.dylib
+
+ returns None if not found or a mapping equivalent to:
+ dict(
+ location='Location',
+ name='Name.SomeVersion_Suffix.dylib',
+ shortname='Name',
+ version='SomeVersion',
+ suffix='Suffix',
+ )
+
+ Note that SomeVersion and Suffix are optional and may be None
+ if not present.
+ """
+ is_dylib = DYLIB_RE.match(filename)
+ if not is_dylib:
+ return None
+ return is_dylib.groupdict()
+
+
+def test_dylib_info():
+ def d(location=None, name=None, shortname=None, version=None, suffix=None):
+ return dict(
+ location=location,
+ name=name,
+ shortname=shortname,
+ version=version,
+ suffix=suffix
+ )
+ assert dylib_info('completely/invalid') is None
+ assert dylib_info('completely/invalide_debug') is None
+ assert dylib_info('P/Foo.dylib') == d('P', 'Foo.dylib', 'Foo')
+ assert dylib_info('P/Foo_debug.dylib') == d('P', 'Foo_debug.dylib', 'Foo', suffix='debug')
+ assert dylib_info('P/Foo.A.dylib') == d('P', 'Foo.A.dylib', 'Foo', 'A')
+ assert dylib_info('P/Foo_debug.A.dylib') == d('P', 'Foo_debug.A.dylib', 'Foo_debug', 'A')
+ assert dylib_info('P/Foo.A_debug.dylib') == d('P', 'Foo.A_debug.dylib', 'Foo', 'A', 'debug')
+
+if __name__ == '__main__':
+ test_dylib_info()
diff --git a/cashew/Lib/ctypes/macholib/framework.py b/cashew/Lib/ctypes/macholib/framework.py
new file mode 100644
index 0000000..ad6ed55
--- /dev/null
+++ b/cashew/Lib/ctypes/macholib/framework.py
@@ -0,0 +1,65 @@
+"""
+Generic framework path manipulation
+"""
+
+import re
+
+__all__ = ['framework_info']
+
+STRICT_FRAMEWORK_RE = re.compile(r"""(?x)
+(?P^.*)(?:^|/)
+(?P
+ (?P\w+).framework/
+ (?:Versions/(?P[^/]+)/)?
+ (?P=shortname)
+ (?:_(?P[^_]+))?
+)$
+""")
+
+def framework_info(filename):
+ """
+ A framework name can take one of the following four forms:
+ Location/Name.framework/Versions/SomeVersion/Name_Suffix
+ Location/Name.framework/Versions/SomeVersion/Name
+ Location/Name.framework/Name_Suffix
+ Location/Name.framework/Name
+
+ returns None if not found, or a mapping equivalent to:
+ dict(
+ location='Location',
+ name='Name.framework/Versions/SomeVersion/Name_Suffix',
+ shortname='Name',
+ version='SomeVersion',
+ suffix='Suffix',
+ )
+
+ Note that SomeVersion and Suffix are optional and may be None
+ if not present
+ """
+ is_framework = STRICT_FRAMEWORK_RE.match(filename)
+ if not is_framework:
+ return None
+ return is_framework.groupdict()
+
+def test_framework_info():
+ def d(location=None, name=None, shortname=None, version=None, suffix=None):
+ return dict(
+ location=location,
+ name=name,
+ shortname=shortname,
+ version=version,
+ suffix=suffix
+ )
+ assert framework_info('completely/invalid') is None
+ assert framework_info('completely/invalid/_debug') is None
+ assert framework_info('P/F.framework') is None
+ assert framework_info('P/F.framework/_debug') is None
+ assert framework_info('P/F.framework/F') == d('P', 'F.framework/F', 'F')
+ assert framework_info('P/F.framework/F_debug') == d('P', 'F.framework/F_debug', 'F', suffix='debug')
+ assert framework_info('P/F.framework/Versions') is None
+ assert framework_info('P/F.framework/Versions/A') is None
+ assert framework_info('P/F.framework/Versions/A/F') == d('P', 'F.framework/Versions/A/F', 'F', 'A')
+ assert framework_info('P/F.framework/Versions/A/F_debug') == d('P', 'F.framework/Versions/A/F_debug', 'F', 'A', 'debug')
+
+if __name__ == '__main__':
+ test_framework_info()
diff --git a/cashew/Lib/ctypes/util.py b/cashew/Lib/ctypes/util.py
new file mode 100644
index 0000000..ab10ec5
--- /dev/null
+++ b/cashew/Lib/ctypes/util.py
@@ -0,0 +1,308 @@
+import os
+import subprocess
+import sys
+
+# find_library(name) returns the pathname of a library, or None.
+if os.name == "nt":
+
+ def _get_build_version():
+ """Return the version of MSVC that was used to build Python.
+
+ For Python 2.3 and up, the version number is included in
+ sys.version. For earlier versions, assume the compiler is MSVC 6.
+ """
+ # This function was copied from Lib/distutils/msvccompiler.py
+ prefix = "MSC v."
+ i = sys.version.find(prefix)
+ if i == -1:
+ return 6
+ i = i + len(prefix)
+ s, rest = sys.version[i:].split(" ", 1)
+ majorVersion = int(s[:-2]) - 6
+ minorVersion = int(s[2:3]) / 10.0
+ # I don't think paths are affected by minor version in version 6
+ if majorVersion == 6:
+ minorVersion = 0
+ if majorVersion >= 6:
+ return majorVersion + minorVersion
+ # else we don't know what version of the compiler this is
+ return None
+
+ def find_msvcrt():
+ """Return the name of the VC runtime dll"""
+ version = _get_build_version()
+ if version is None:
+ # better be safe than sorry
+ return None
+ if version <= 6:
+ clibname = 'msvcrt'
+ else:
+ clibname = 'msvcr%d' % (version * 10)
+
+ # If python was built with in debug mode
+ import imp
+ if imp.get_suffixes()[0][0] == '_d.pyd':
+ clibname += 'd'
+ return clibname+'.dll'
+
+ def find_library(name):
+ if name in ('c', 'm'):
+ return find_msvcrt()
+ # See MSDN for the REAL search order.
+ for directory in os.environ['PATH'].split(os.pathsep):
+ fname = os.path.join(directory, name)
+ if os.path.isfile(fname):
+ return fname
+ if fname.lower().endswith(".dll"):
+ continue
+ fname = fname + ".dll"
+ if os.path.isfile(fname):
+ return fname
+ return None
+
+if os.name == "ce":
+ # search path according to MSDN:
+ # - absolute path specified by filename
+ # - The .exe launch directory
+ # - the Windows directory
+ # - ROM dll files (where are they?)
+ # - OEM specified search path: HKLM\Loader\SystemPath
+ def find_library(name):
+ return name
+
+if os.name == "posix" and sys.platform == "darwin":
+ from ctypes.macholib.dyld import dyld_find as _dyld_find
+ def find_library(name):
+ possible = ['lib%s.dylib' % name,
+ '%s.dylib' % name,
+ '%s.framework/%s' % (name, name)]
+ for name in possible:
+ try:
+ return _dyld_find(name)
+ except ValueError:
+ continue
+ return None
+
+elif os.name == "posix":
+ # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump
+ import re, tempfile, errno
+
+ def _findLib_gcc(name):
+ # Run GCC's linker with the -t (aka --trace) option and examine the
+ # library name it prints out. The GCC command will fail because we
+ # haven't supplied a proper program with main(), but that does not
+ # matter.
+ expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name)
+ cmd = 'if type gcc >/dev/null 2>&1; then CC=gcc; elif type cc >/dev/null 2>&1; then CC=cc;else exit; fi;' \
+ 'LANG=C LC_ALL=C $CC -Wl,-t -o "$2" 2>&1 -l"$1"'
+
+ temp = tempfile.NamedTemporaryFile()
+ try:
+ proc = subprocess.Popen((cmd, '_findLib_gcc', name, temp.name),
+ shell=True,
+ stdout=subprocess.PIPE)
+ [trace, _] = proc.communicate()
+ finally:
+ try:
+ temp.close()
+ except OSError, e:
+ # ENOENT is raised if the file was already removed, which is
+ # the normal behaviour of GCC if linking fails
+ if e.errno != errno.ENOENT:
+ raise
+ res = re.search(expr, trace)
+ if not res:
+ return None
+ return res.group(0)
+
+
+ if sys.platform == "sunos5":
+ # use /usr/ccs/bin/dump on solaris
+ def _get_soname(f):
+ if not f:
+ return None
+
+ null = open(os.devnull, "wb")
+ try:
+ with null:
+ proc = subprocess.Popen(("/usr/ccs/bin/dump", "-Lpv", f),
+ stdout=subprocess.PIPE,
+ stderr=null)
+ except OSError: # E.g. command not found
+ return None
+ [data, _] = proc.communicate()
+ res = re.search(br'\[.*\]\sSONAME\s+([^\s]+)', data)
+ if not res:
+ return None
+ return res.group(1)
+ else:
+ def _get_soname(f):
+ # assuming GNU binutils / ELF
+ if not f:
+ return None
+ cmd = 'if ! type objdump >/dev/null 2>&1; then exit; fi;' \
+ 'objdump -p -j .dynamic 2>/dev/null "$1"'
+ proc = subprocess.Popen((cmd, '_get_soname', f), shell=True,
+ stdout=subprocess.PIPE)
+ [dump, _] = proc.communicate()
+ res = re.search(br'\sSONAME\s+([^\s]+)', dump)
+ if not res:
+ return None
+ return res.group(1)
+
+ if (sys.platform.startswith("freebsd")
+ or sys.platform.startswith("openbsd")
+ or sys.platform.startswith("dragonfly")):
+
+ def _num_version(libname):
+ # "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ]
+ parts = libname.split(b".")
+ nums = []
+ try:
+ while parts:
+ nums.insert(0, int(parts.pop()))
+ except ValueError:
+ pass
+ return nums or [sys.maxint]
+
+ def find_library(name):
+ ename = re.escape(name)
+ expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename)
+
+ null = open(os.devnull, 'wb')
+ try:
+ with null:
+ proc = subprocess.Popen(('/sbin/ldconfig', '-r'),
+ stdout=subprocess.PIPE,
+ stderr=null)
+ except OSError: # E.g. command not found
+ data = b''
+ else:
+ [data, _] = proc.communicate()
+
+ res = re.findall(expr, data)
+ if not res:
+ return _get_soname(_findLib_gcc(name))
+ res.sort(key=_num_version)
+ return res[-1]
+
+ elif sys.platform == "sunos5":
+
+ def _findLib_crle(name, is64):
+ if not os.path.exists('/usr/bin/crle'):
+ return None
+
+ env = dict(os.environ)
+ env['LC_ALL'] = 'C'
+
+ if is64:
+ args = ('/usr/bin/crle', '-64')
+ else:
+ args = ('/usr/bin/crle',)
+
+ paths = None
+ null = open(os.devnull, 'wb')
+ try:
+ with null:
+ proc = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=null,
+ env=env)
+ except OSError: # E.g. bad executable
+ return None
+ try:
+ for line in proc.stdout:
+ line = line.strip()
+ if line.startswith(b'Default Library Path (ELF):'):
+ paths = line.split()[4]
+ finally:
+ proc.stdout.close()
+ proc.wait()
+
+ if not paths:
+ return None
+
+ for dir in paths.split(":"):
+ libfile = os.path.join(dir, "lib%s.so" % name)
+ if os.path.exists(libfile):
+ return libfile
+
+ return None
+
+ def find_library(name, is64 = False):
+ return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name))
+
+ else:
+
+ def _findSoname_ldconfig(name):
+ import struct
+ if struct.calcsize('l') == 4:
+ machine = os.uname()[4] + '-32'
+ else:
+ machine = os.uname()[4] + '-64'
+ mach_map = {
+ 'x86_64-64': 'libc6,x86-64',
+ 'ppc64-64': 'libc6,64bit',
+ 'sparc64-64': 'libc6,64bit',
+ 's390x-64': 'libc6,64bit',
+ 'ia64-64': 'libc6,IA-64',
+ }
+ abi_type = mach_map.get(machine, 'libc6')
+
+ # XXX assuming GLIBC's ldconfig (with option -p)
+ expr = r'\s+(lib%s\.[^\s]+)\s+\(%s' % (re.escape(name), abi_type)
+
+ env = dict(os.environ)
+ env['LC_ALL'] = 'C'
+ env['LANG'] = 'C'
+ null = open(os.devnull, 'wb')
+ try:
+ with null:
+ p = subprocess.Popen(['/sbin/ldconfig', '-p'],
+ stderr=null,
+ stdout=subprocess.PIPE,
+ env=env)
+ except OSError: # E.g. command not found
+ return None
+ [data, _] = p.communicate()
+ res = re.search(expr, data)
+ if not res:
+ return None
+ return res.group(1)
+
+ def find_library(name):
+ return _findSoname_ldconfig(name) or _get_soname(_findLib_gcc(name))
+
+################################################################
+# test code
+
+def test():
+ from ctypes import cdll
+ if os.name == "nt":
+ print cdll.msvcrt
+ print cdll.load("msvcrt")
+ print find_library("msvcrt")
+
+ if os.name == "posix":
+ # find and load_version
+ print find_library("m")
+ print find_library("c")
+ print find_library("bz2")
+
+ # getattr
+## print cdll.m
+## print cdll.bz2
+
+ # load
+ if sys.platform == "darwin":
+ print cdll.LoadLibrary("libm.dylib")
+ print cdll.LoadLibrary("libcrypto.dylib")
+ print cdll.LoadLibrary("libSystem.dylib")
+ print cdll.LoadLibrary("System.framework/System")
+ else:
+ print cdll.LoadLibrary("libm.so")
+ print cdll.LoadLibrary("libcrypt.so")
+ print find_library("crypt")
+
+if __name__ == "__main__":
+ test()
diff --git a/cashew/Lib/ctypes/wintypes.py b/cashew/Lib/ctypes/wintypes.py
new file mode 100644
index 0000000..e7f569c
--- /dev/null
+++ b/cashew/Lib/ctypes/wintypes.py
@@ -0,0 +1,181 @@
+# The most useful windows datatypes
+from ctypes import *
+
+BYTE = c_byte
+WORD = c_ushort
+DWORD = c_ulong
+
+WCHAR = c_wchar
+UINT = c_uint
+INT = c_int
+
+DOUBLE = c_double
+FLOAT = c_float
+
+BOOLEAN = BYTE
+BOOL = c_long
+
+from ctypes import _SimpleCData
+class VARIANT_BOOL(_SimpleCData):
+ _type_ = "v"
+ def __repr__(self):
+ return "%s(%r)" % (self.__class__.__name__, self.value)
+
+ULONG = c_ulong
+LONG = c_long
+
+USHORT = c_ushort
+SHORT = c_short
+
+# in the windows header files, these are structures.
+_LARGE_INTEGER = LARGE_INTEGER = c_longlong
+_ULARGE_INTEGER = ULARGE_INTEGER = c_ulonglong
+
+LPCOLESTR = LPOLESTR = OLESTR = c_wchar_p
+LPCWSTR = LPWSTR = c_wchar_p
+LPCSTR = LPSTR = c_char_p
+LPCVOID = LPVOID = c_void_p
+
+# WPARAM is defined as UINT_PTR (unsigned type)
+# LPARAM is defined as LONG_PTR (signed type)
+if sizeof(c_long) == sizeof(c_void_p):
+ WPARAM = c_ulong
+ LPARAM = c_long
+elif sizeof(c_longlong) == sizeof(c_void_p):
+ WPARAM = c_ulonglong
+ LPARAM = c_longlong
+
+ATOM = WORD
+LANGID = WORD
+
+COLORREF = DWORD
+LGRPID = DWORD
+LCTYPE = DWORD
+
+LCID = DWORD
+
+################################################################
+# HANDLE types
+HANDLE = c_void_p # in the header files: void *
+
+HACCEL = HANDLE
+HBITMAP = HANDLE
+HBRUSH = HANDLE
+HCOLORSPACE = HANDLE
+HDC = HANDLE
+HDESK = HANDLE
+HDWP = HANDLE
+HENHMETAFILE = HANDLE
+HFONT = HANDLE
+HGDIOBJ = HANDLE
+HGLOBAL = HANDLE
+HHOOK = HANDLE
+HICON = HANDLE
+HINSTANCE = HANDLE
+HKEY = HANDLE
+HKL = HANDLE
+HLOCAL = HANDLE
+HMENU = HANDLE
+HMETAFILE = HANDLE
+HMODULE = HANDLE
+HMONITOR = HANDLE
+HPALETTE = HANDLE
+HPEN = HANDLE
+HRGN = HANDLE
+HRSRC = HANDLE
+HSTR = HANDLE
+HTASK = HANDLE
+HWINSTA = HANDLE
+HWND = HANDLE
+SC_HANDLE = HANDLE
+SERVICE_STATUS_HANDLE = HANDLE
+
+################################################################
+# Some important structure definitions
+
+class RECT(Structure):
+ _fields_ = [("left", c_long),
+ ("top", c_long),
+ ("right", c_long),
+ ("bottom", c_long)]
+tagRECT = _RECTL = RECTL = RECT
+
+class _SMALL_RECT(Structure):
+ _fields_ = [('Left', c_short),
+ ('Top', c_short),
+ ('Right', c_short),
+ ('Bottom', c_short)]
+SMALL_RECT = _SMALL_RECT
+
+class _COORD(Structure):
+ _fields_ = [('X', c_short),
+ ('Y', c_short)]
+
+class POINT(Structure):
+ _fields_ = [("x", c_long),
+ ("y", c_long)]
+tagPOINT = _POINTL = POINTL = POINT
+
+class SIZE(Structure):
+ _fields_ = [("cx", c_long),
+ ("cy", c_long)]
+tagSIZE = SIZEL = SIZE
+
+def RGB(red, green, blue):
+ return red + (green << 8) + (blue << 16)
+
+class FILETIME(Structure):
+ _fields_ = [("dwLowDateTime", DWORD),
+ ("dwHighDateTime", DWORD)]
+_FILETIME = FILETIME
+
+class MSG(Structure):
+ _fields_ = [("hWnd", HWND),
+ ("message", c_uint),
+ ("wParam", WPARAM),
+ ("lParam", LPARAM),
+ ("time", DWORD),
+ ("pt", POINT)]
+tagMSG = MSG
+MAX_PATH = 260
+
+class WIN32_FIND_DATAA(Structure):
+ _fields_ = [("dwFileAttributes", DWORD),
+ ("ftCreationTime", FILETIME),
+ ("ftLastAccessTime", FILETIME),
+ ("ftLastWriteTime", FILETIME),
+ ("nFileSizeHigh", DWORD),
+ ("nFileSizeLow", DWORD),
+ ("dwReserved0", DWORD),
+ ("dwReserved1", DWORD),
+ ("cFileName", c_char * MAX_PATH),
+ ("cAlternateFileName", c_char * 14)]
+
+class WIN32_FIND_DATAW(Structure):
+ _fields_ = [("dwFileAttributes", DWORD),
+ ("ftCreationTime", FILETIME),
+ ("ftLastAccessTime", FILETIME),
+ ("ftLastWriteTime", FILETIME),
+ ("nFileSizeHigh", DWORD),
+ ("nFileSizeLow", DWORD),
+ ("dwReserved0", DWORD),
+ ("dwReserved1", DWORD),
+ ("cFileName", c_wchar * MAX_PATH),
+ ("cAlternateFileName", c_wchar * 14)]
+
+__all__ = ['ATOM', 'BOOL', 'BOOLEAN', 'BYTE', 'COLORREF', 'DOUBLE', 'DWORD',
+ 'FILETIME', 'FLOAT', 'HACCEL', 'HANDLE', 'HBITMAP', 'HBRUSH',
+ 'HCOLORSPACE', 'HDC', 'HDESK', 'HDWP', 'HENHMETAFILE', 'HFONT',
+ 'HGDIOBJ', 'HGLOBAL', 'HHOOK', 'HICON', 'HINSTANCE', 'HKEY',
+ 'HKL', 'HLOCAL', 'HMENU', 'HMETAFILE', 'HMODULE', 'HMONITOR',
+ 'HPALETTE', 'HPEN', 'HRGN', 'HRSRC', 'HSTR', 'HTASK', 'HWINSTA',
+ 'HWND', 'INT', 'LANGID', 'LARGE_INTEGER', 'LCID', 'LCTYPE',
+ 'LGRPID', 'LONG', 'LPARAM', 'LPCOLESTR', 'LPCSTR', 'LPCVOID',
+ 'LPCWSTR', 'LPOLESTR', 'LPSTR', 'LPVOID', 'LPWSTR', 'MAX_PATH',
+ 'MSG', 'OLESTR', 'POINT', 'POINTL', 'RECT', 'RECTL', 'RGB',
+ 'SC_HANDLE', 'SERVICE_STATUS_HANDLE', 'SHORT', 'SIZE', 'SIZEL',
+ 'SMALL_RECT', 'UINT', 'ULARGE_INTEGER', 'ULONG', 'USHORT',
+ 'VARIANT_BOOL', 'WCHAR', 'WIN32_FIND_DATAA', 'WIN32_FIND_DATAW',
+ 'WORD', 'WPARAM', '_COORD', '_FILETIME', '_LARGE_INTEGER',
+ '_POINTL', '_RECTL', '_SMALL_RECT', '_ULARGE_INTEGER', 'tagMSG',
+ 'tagPOINT', 'tagRECT', 'tagSIZE']
diff --git a/cashew/Lib/decimal.py b/cashew/Lib/decimal.py
new file mode 100644
index 0000000..95aadb1
--- /dev/null
+++ b/cashew/Lib/decimal.py
@@ -0,0 +1,6233 @@
+# Copyright (c) 2004 Python Software Foundation.
+# All rights reserved.
+
+# Written by Eric Price
+# and Facundo Batista
+# and Raymond Hettinger
+# and Aahz
+# and Tim Peters
+
+# This module is currently Py2.3 compatible and should be kept that way
+# unless a major compelling advantage arises. IOW, 2.3 compatibility is
+# strongly preferred, but not guaranteed.
+
+# Also, this module should be kept in sync with the latest updates of
+# the IBM specification as it evolves. Those updates will be treated
+# as bug fixes (deviation from the spec is a compatibility, usability
+# bug) and will be backported. At this point the spec is stabilizing
+# and the updates are becoming fewer, smaller, and less significant.
+
+"""
+This is a Py2.3 implementation of decimal floating point arithmetic based on
+the General Decimal Arithmetic Specification:
+
+ http://speleotrove.com/decimal/decarith.html
+
+and IEEE standard 854-1987:
+
+ http://en.wikipedia.org/wiki/IEEE_854-1987
+
+Decimal floating point has finite precision with arbitrarily large bounds.
+
+The purpose of this module is to support arithmetic using familiar
+"schoolhouse" rules and to avoid some of the tricky representation
+issues associated with binary floating point. The package is especially
+useful for financial applications or for contexts where users have
+expectations that are at odds with binary floating point (for instance,
+in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead
+of the expected Decimal('0.00') returned by decimal floating point).
+
+Here are some examples of using the decimal module:
+
+>>> from decimal import *
+>>> setcontext(ExtendedContext)
+>>> Decimal(0)
+Decimal('0')
+>>> Decimal('1')
+Decimal('1')
+>>> Decimal('-.0123')
+Decimal('-0.0123')
+>>> Decimal(123456)
+Decimal('123456')
+>>> Decimal('123.45e12345678901234567890')
+Decimal('1.2345E+12345678901234567892')
+>>> Decimal('1.33') + Decimal('1.27')
+Decimal('2.60')
+>>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41')
+Decimal('-2.20')
+>>> dig = Decimal(1)
+>>> print dig / Decimal(3)
+0.333333333
+>>> getcontext().prec = 18
+>>> print dig / Decimal(3)
+0.333333333333333333
+>>> print dig.sqrt()
+1
+>>> print Decimal(3).sqrt()
+1.73205080756887729
+>>> print Decimal(3) ** 123
+4.85192780976896427E+58
+>>> inf = Decimal(1) / Decimal(0)
+>>> print inf
+Infinity
+>>> neginf = Decimal(-1) / Decimal(0)
+>>> print neginf
+-Infinity
+>>> print neginf + inf
+NaN
+>>> print neginf * inf
+-Infinity
+>>> print dig / 0
+Infinity
+>>> getcontext().traps[DivisionByZero] = 1
+>>> print dig / 0
+Traceback (most recent call last):
+ ...
+ ...
+ ...
+DivisionByZero: x / 0
+>>> c = Context()
+>>> c.traps[InvalidOperation] = 0
+>>> print c.flags[InvalidOperation]
+0
+>>> c.divide(Decimal(0), Decimal(0))
+Decimal('NaN')
+>>> c.traps[InvalidOperation] = 1
+>>> print c.flags[InvalidOperation]
+1
+>>> c.flags[InvalidOperation] = 0
+>>> print c.flags[InvalidOperation]
+0
+>>> print c.divide(Decimal(0), Decimal(0))
+Traceback (most recent call last):
+ ...
+ ...
+ ...
+InvalidOperation: 0 / 0
+>>> print c.flags[InvalidOperation]
+1
+>>> c.flags[InvalidOperation] = 0
+>>> c.traps[InvalidOperation] = 0
+>>> print c.divide(Decimal(0), Decimal(0))
+NaN
+>>> print c.flags[InvalidOperation]
+1
+>>>
+"""
+
+__all__ = [
+ # Two major classes
+ 'Decimal', 'Context',
+
+ # Contexts
+ 'DefaultContext', 'BasicContext', 'ExtendedContext',
+
+ # Exceptions
+ 'DecimalException', 'Clamped', 'InvalidOperation', 'DivisionByZero',
+ 'Inexact', 'Rounded', 'Subnormal', 'Overflow', 'Underflow',
+
+ # Constants for use in setting up contexts
+ 'ROUND_DOWN', 'ROUND_HALF_UP', 'ROUND_HALF_EVEN', 'ROUND_CEILING',
+ 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP',
+
+ # Functions for manipulating contexts
+ 'setcontext', 'getcontext', 'localcontext'
+]
+
+__version__ = '1.70' # Highest version of the spec this complies with
+
+import math as _math
+import numbers as _numbers
+
+try:
+ from collections import namedtuple as _namedtuple
+ DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent')
+except ImportError:
+ DecimalTuple = lambda *args: args
+
+# Rounding
+ROUND_DOWN = 'ROUND_DOWN'
+ROUND_HALF_UP = 'ROUND_HALF_UP'
+ROUND_HALF_EVEN = 'ROUND_HALF_EVEN'
+ROUND_CEILING = 'ROUND_CEILING'
+ROUND_FLOOR = 'ROUND_FLOOR'
+ROUND_UP = 'ROUND_UP'
+ROUND_HALF_DOWN = 'ROUND_HALF_DOWN'
+ROUND_05UP = 'ROUND_05UP'
+
+# Errors
+
+class DecimalException(ArithmeticError):
+ """Base exception class.
+
+ Used exceptions derive from this.
+ If an exception derives from another exception besides this (such as
+ Underflow (Inexact, Rounded, Subnormal) that indicates that it is only
+ called if the others are present. This isn't actually used for
+ anything, though.
+
+ handle -- Called when context._raise_error is called and the
+ trap_enabler is not set. First argument is self, second is the
+ context. More arguments can be given, those being after
+ the explanation in _raise_error (For example,
+ context._raise_error(NewError, '(-x)!', self._sign) would
+ call NewError().handle(context, self._sign).)
+
+ To define a new exception, it should be sufficient to have it derive
+ from DecimalException.
+ """
+ def handle(self, context, *args):
+ pass
+
+
+class Clamped(DecimalException):
+ """Exponent of a 0 changed to fit bounds.
+
+ This occurs and signals clamped if the exponent of a result has been
+ altered in order to fit the constraints of a specific concrete
+ representation. This may occur when the exponent of a zero result would
+ be outside the bounds of a representation, or when a large normal
+ number would have an encoded exponent that cannot be represented. In
+ this latter case, the exponent is reduced to fit and the corresponding
+ number of zero digits are appended to the coefficient ("fold-down").
+ """
+
+class InvalidOperation(DecimalException):
+ """An invalid operation was performed.
+
+ Various bad things cause this:
+
+ Something creates a signaling NaN
+ -INF + INF
+ 0 * (+-)INF
+ (+-)INF / (+-)INF
+ x % 0
+ (+-)INF % x
+ x._rescale( non-integer )
+ sqrt(-x) , x > 0
+ 0 ** 0
+ x ** (non-integer)
+ x ** (+-)INF
+ An operand is invalid
+
+ The result of the operation after these is a quiet positive NaN,
+ except when the cause is a signaling NaN, in which case the result is
+ also a quiet NaN, but with the original sign, and an optional
+ diagnostic information.
+ """
+ def handle(self, context, *args):
+ if args:
+ ans = _dec_from_triple(args[0]._sign, args[0]._int, 'n', True)
+ return ans._fix_nan(context)
+ return _NaN
+
+class ConversionSyntax(InvalidOperation):
+ """Trying to convert badly formed string.
+
+ This occurs and signals invalid-operation if a string is being
+ converted to a number and it does not conform to the numeric string
+ syntax. The result is [0,qNaN].
+ """
+ def handle(self, context, *args):
+ return _NaN
+
+class DivisionByZero(DecimalException, ZeroDivisionError):
+ """Division by 0.
+
+ This occurs and signals division-by-zero if division of a finite number
+ by zero was attempted (during a divide-integer or divide operation, or a
+ power operation with negative right-hand operand), and the dividend was
+ not zero.
+
+ The result of the operation is [sign,inf], where sign is the exclusive
+ or of the signs of the operands for divide, or is 1 for an odd power of
+ -0, for power.
+ """
+
+ def handle(self, context, sign, *args):
+ return _SignedInfinity[sign]
+
+class DivisionImpossible(InvalidOperation):
+ """Cannot perform the division adequately.
+
+ This occurs and signals invalid-operation if the integer result of a
+ divide-integer or remainder operation had too many digits (would be
+ longer than precision). The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return _NaN
+
+class DivisionUndefined(InvalidOperation, ZeroDivisionError):
+ """Undefined result of division.
+
+ This occurs and signals invalid-operation if division by zero was
+ attempted (during a divide-integer, divide, or remainder operation), and
+ the dividend is also zero. The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return _NaN
+
+class Inexact(DecimalException):
+ """Had to round, losing information.
+
+ This occurs and signals inexact whenever the result of an operation is
+ not exact (that is, it needed to be rounded and any discarded digits
+ were non-zero), or if an overflow or underflow condition occurs. The
+ result in all cases is unchanged.
+
+ The inexact signal may be tested (or trapped) to determine if a given
+ operation (or sequence of operations) was inexact.
+ """
+
+class InvalidContext(InvalidOperation):
+ """Invalid context. Unknown rounding, for example.
+
+ This occurs and signals invalid-operation if an invalid context was
+ detected during an operation. This can occur if contexts are not checked
+ on creation and either the precision exceeds the capability of the
+ underlying concrete representation or an unknown or unsupported rounding
+ was specified. These aspects of the context need only be checked when
+ the values are required to be used. The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return _NaN
+
+class Rounded(DecimalException):
+ """Number got rounded (not necessarily changed during rounding).
+
+ This occurs and signals rounded whenever the result of an operation is
+ rounded (that is, some zero or non-zero digits were discarded from the
+ coefficient), or if an overflow or underflow condition occurs. The
+ result in all cases is unchanged.
+
+ The rounded signal may be tested (or trapped) to determine if a given
+ operation (or sequence of operations) caused a loss of precision.
+ """
+
+class Subnormal(DecimalException):
+ """Exponent < Emin before rounding.
+
+ This occurs and signals subnormal whenever the result of a conversion or
+ operation is subnormal (that is, its adjusted exponent is less than
+ Emin, before any rounding). The result in all cases is unchanged.
+
+ The subnormal signal may be tested (or trapped) to determine if a given
+ or operation (or sequence of operations) yielded a subnormal result.
+ """
+
+class Overflow(Inexact, Rounded):
+ """Numerical overflow.
+
+ This occurs and signals overflow if the adjusted exponent of a result
+ (from a conversion or from an operation that is not an attempt to divide
+ by zero), after rounding, would be greater than the largest value that
+ can be handled by the implementation (the value Emax).
+
+ The result depends on the rounding mode:
+
+ For round-half-up and round-half-even (and for round-half-down and
+ round-up, if implemented), the result of the operation is [sign,inf],
+ where sign is the sign of the intermediate result. For round-down, the
+ result is the largest finite number that can be represented in the
+ current precision, with the sign of the intermediate result. For
+ round-ceiling, the result is the same as for round-down if the sign of
+ the intermediate result is 1, or is [0,inf] otherwise. For round-floor,
+ the result is the same as for round-down if the sign of the intermediate
+ result is 0, or is [1,inf] otherwise. In all cases, Inexact and Rounded
+ will also be raised.
+ """
+
+ def handle(self, context, sign, *args):
+ if context.rounding in (ROUND_HALF_UP, ROUND_HALF_EVEN,
+ ROUND_HALF_DOWN, ROUND_UP):
+ return _SignedInfinity[sign]
+ if sign == 0:
+ if context.rounding == ROUND_CEILING:
+ return _SignedInfinity[sign]
+ return _dec_from_triple(sign, '9'*context.prec,
+ context.Emax-context.prec+1)
+ if sign == 1:
+ if context.rounding == ROUND_FLOOR:
+ return _SignedInfinity[sign]
+ return _dec_from_triple(sign, '9'*context.prec,
+ context.Emax-context.prec+1)
+
+
+class Underflow(Inexact, Rounded, Subnormal):
+ """Numerical underflow with result rounded to 0.
+
+ This occurs and signals underflow if a result is inexact and the
+ adjusted exponent of the result would be smaller (more negative) than
+ the smallest value that can be handled by the implementation (the value
+ Emin). That is, the result is both inexact and subnormal.
+
+ The result after an underflow will be a subnormal number rounded, if
+ necessary, so that its exponent is not less than Etiny. This may result
+ in 0 with the sign of the intermediate result and an exponent of Etiny.
+
+ In all cases, Inexact, Rounded, and Subnormal will also be raised.
+ """
+
+# List of public traps and flags
+_signals = [Clamped, DivisionByZero, Inexact, Overflow, Rounded,
+ Underflow, InvalidOperation, Subnormal]
+
+# Map conditions (per the spec) to signals
+_condition_map = {ConversionSyntax:InvalidOperation,
+ DivisionImpossible:InvalidOperation,
+ DivisionUndefined:InvalidOperation,
+ InvalidContext:InvalidOperation}
+
+##### Context Functions ##################################################
+
+# The getcontext() and setcontext() function manage access to a thread-local
+# current context. Py2.4 offers direct support for thread locals. If that
+# is not available, use threading.currentThread() which is slower but will
+# work for older Pythons. If threads are not part of the build, create a
+# mock threading object with threading.local() returning the module namespace.
+
+try:
+ import threading
+except ImportError:
+ # Python was compiled without threads; create a mock object instead
+ import sys
+ class MockThreading(object):
+ def local(self, sys=sys):
+ return sys.modules[__name__]
+ threading = MockThreading()
+ del sys, MockThreading
+
+try:
+ threading.local
+
+except AttributeError:
+
+ # To fix reloading, force it to create a new context
+ # Old contexts have different exceptions in their dicts, making problems.
+ if hasattr(threading.currentThread(), '__decimal_context__'):
+ del threading.currentThread().__decimal_context__
+
+ def setcontext(context):
+ """Set this thread's context to context."""
+ if context in (DefaultContext, BasicContext, ExtendedContext):
+ context = context.copy()
+ context.clear_flags()
+ threading.currentThread().__decimal_context__ = context
+
+ def getcontext():
+ """Returns this thread's context.
+
+ If this thread does not yet have a context, returns
+ a new context and sets this thread's context.
+ New contexts are copies of DefaultContext.
+ """
+ try:
+ return threading.currentThread().__decimal_context__
+ except AttributeError:
+ context = Context()
+ threading.currentThread().__decimal_context__ = context
+ return context
+
+else:
+
+ local = threading.local()
+ if hasattr(local, '__decimal_context__'):
+ del local.__decimal_context__
+
+ def getcontext(_local=local):
+ """Returns this thread's context.
+
+ If this thread does not yet have a context, returns
+ a new context and sets this thread's context.
+ New contexts are copies of DefaultContext.
+ """
+ try:
+ return _local.__decimal_context__
+ except AttributeError:
+ context = Context()
+ _local.__decimal_context__ = context
+ return context
+
+ def setcontext(context, _local=local):
+ """Set this thread's context to context."""
+ if context in (DefaultContext, BasicContext, ExtendedContext):
+ context = context.copy()
+ context.clear_flags()
+ _local.__decimal_context__ = context
+
+ del threading, local # Don't contaminate the namespace
+
+def localcontext(ctx=None):
+ """Return a context manager for a copy of the supplied context
+
+ Uses a copy of the current context if no context is specified
+ The returned context manager creates a local decimal context
+ in a with statement:
+ def sin(x):
+ with localcontext() as ctx:
+ ctx.prec += 2
+ # Rest of sin calculation algorithm
+ # uses a precision 2 greater than normal
+ return +s # Convert result to normal precision
+
+ def sin(x):
+ with localcontext(ExtendedContext):
+ # Rest of sin calculation algorithm
+ # uses the Extended Context from the
+ # General Decimal Arithmetic Specification
+ return +s # Convert result to normal context
+
+ >>> setcontext(DefaultContext)
+ >>> print getcontext().prec
+ 28
+ >>> with localcontext():
+ ... ctx = getcontext()
+ ... ctx.prec += 2
+ ... print ctx.prec
+ ...
+ 30
+ >>> with localcontext(ExtendedContext):
+ ... print getcontext().prec
+ ...
+ 9
+ >>> print getcontext().prec
+ 28
+ """
+ if ctx is None: ctx = getcontext()
+ return _ContextManager(ctx)
+
+
+##### Decimal class #######################################################
+
+class Decimal(object):
+ """Floating point class for decimal arithmetic."""
+
+ __slots__ = ('_exp','_int','_sign', '_is_special')
+ # Generally, the value of the Decimal instance is given by
+ # (-1)**_sign * _int * 10**_exp
+ # Special values are signified by _is_special == True
+
+ # We're immutable, so use __new__ not __init__
+ def __new__(cls, value="0", context=None):
+ """Create a decimal point instance.
+
+ >>> Decimal('3.14') # string input
+ Decimal('3.14')
+ >>> Decimal((0, (3, 1, 4), -2)) # tuple (sign, digit_tuple, exponent)
+ Decimal('3.14')
+ >>> Decimal(314) # int or long
+ Decimal('314')
+ >>> Decimal(Decimal(314)) # another decimal instance
+ Decimal('314')
+ >>> Decimal(' 3.14 \\n') # leading and trailing whitespace okay
+ Decimal('3.14')
+ """
+
+ # Note that the coefficient, self._int, is actually stored as
+ # a string rather than as a tuple of digits. This speeds up
+ # the "digits to integer" and "integer to digits" conversions
+ # that are used in almost every arithmetic operation on
+ # Decimals. This is an internal detail: the as_tuple function
+ # and the Decimal constructor still deal with tuples of
+ # digits.
+
+ self = object.__new__(cls)
+
+ import sys
+ if sys.platform == 'cli':
+ import System
+ if isinstance(value, System.Decimal):
+ value = str(value)
+
+ # From a string
+ # REs insist on real strings, so we can too.
+ if isinstance(value, basestring):
+ m = _parser(value.strip())
+ if m is None:
+ if context is None:
+ context = getcontext()
+ return context._raise_error(ConversionSyntax,
+ "Invalid literal for Decimal: %r" % value)
+
+ if m.group('sign') == "-":
+ self._sign = 1
+ else:
+ self._sign = 0
+ intpart = m.group('int')
+ if intpart is not None:
+ # finite number
+ fracpart = m.group('frac') or ''
+ exp = int(m.group('exp') or '0')
+ self._int = str(int(intpart+fracpart))
+ self._exp = exp - len(fracpart)
+ self._is_special = False
+ else:
+ diag = m.group('diag')
+ if diag is not None:
+ # NaN
+ self._int = str(int(diag or '0')).lstrip('0')
+ if m.group('signal'):
+ self._exp = 'N'
+ else:
+ self._exp = 'n'
+ else:
+ # infinity
+ self._int = '0'
+ self._exp = 'F'
+ self._is_special = True
+ return self
+
+ # From an integer
+ if isinstance(value, (int,long)):
+ if value >= 0:
+ self._sign = 0
+ else:
+ self._sign = 1
+ self._exp = 0
+ self._int = str(abs(value))
+ self._is_special = False
+ return self
+
+ # From another decimal
+ if isinstance(value, Decimal):
+ self._exp = value._exp
+ self._sign = value._sign
+ self._int = value._int
+ self._is_special = value._is_special
+ return self
+
+ # From an internal working value
+ if isinstance(value, _WorkRep):
+ self._sign = value.sign
+ self._int = str(value.int)
+ self._exp = int(value.exp)
+ self._is_special = False
+ return self
+
+ # tuple/list conversion (possibly from as_tuple())
+ if isinstance(value, (list,tuple)):
+ if len(value) != 3:
+ raise ValueError('Invalid tuple size in creation of Decimal '
+ 'from list or tuple. The list or tuple '
+ 'should have exactly three elements.')
+ # process sign. The isinstance test rejects floats
+ if not (isinstance(value[0], (int, long)) and value[0] in (0,1)):
+ raise ValueError("Invalid sign. The first value in the tuple "
+ "should be an integer; either 0 for a "
+ "positive number or 1 for a negative number.")
+ self._sign = value[0]
+ if value[2] == 'F':
+ # infinity: value[1] is ignored
+ self._int = '0'
+ self._exp = value[2]
+ self._is_special = True
+ else:
+ # process and validate the digits in value[1]
+ digits = []
+ for digit in value[1]:
+ if isinstance(digit, (int, long)) and 0 <= digit <= 9:
+ # skip leading zeros
+ if digits or digit != 0:
+ digits.append(digit)
+ else:
+ raise ValueError("The second value in the tuple must "
+ "be composed of integers in the range "
+ "0 through 9.")
+ if value[2] in ('n', 'N'):
+ # NaN: digits form the diagnostic
+ self._int = ''.join(map(str, digits))
+ self._exp = value[2]
+ self._is_special = True
+ elif isinstance(value[2], (int, long)):
+ # finite number: digits give the coefficient
+ self._int = ''.join(map(str, digits or [0]))
+ self._exp = value[2]
+ self._is_special = False
+ else:
+ raise ValueError("The third value in the tuple must "
+ "be an integer, or one of the "
+ "strings 'F', 'n', 'N'.")
+ return self
+
+ if isinstance(value, float):
+ value = Decimal.from_float(value)
+ self._exp = value._exp
+ self._sign = value._sign
+ self._int = value._int
+ self._is_special = value._is_special
+ return self
+
+ raise TypeError("Cannot convert %r to Decimal" % value)
+
+ # @classmethod, but @decorator is not valid Python 2.3 syntax, so
+ # don't use it (see notes on Py2.3 compatibility at top of file)
+ def from_float(cls, f):
+ """Converts a float to a decimal number, exactly.
+
+ Note that Decimal.from_float(0.1) is not the same as Decimal('0.1').
+ Since 0.1 is not exactly representable in binary floating point, the
+ value is stored as the nearest representable value which is
+ 0x1.999999999999ap-4. The exact equivalent of the value in decimal
+ is 0.1000000000000000055511151231257827021181583404541015625.
+
+ >>> Decimal.from_float(0.1)
+ Decimal('0.1000000000000000055511151231257827021181583404541015625')
+ >>> Decimal.from_float(float('nan'))
+ Decimal('NaN')
+ >>> Decimal.from_float(float('inf'))
+ Decimal('Infinity')
+ >>> Decimal.from_float(-float('inf'))
+ Decimal('-Infinity')
+ >>> Decimal.from_float(-0.0)
+ Decimal('-0')
+
+ """
+ if isinstance(f, (int, long)): # handle integer inputs
+ return cls(f)
+ if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float
+ return cls(repr(f))
+ if _math.copysign(1.0, f) == 1.0:
+ sign = 0
+ else:
+ sign = 1
+ n, d = abs(f).as_integer_ratio()
+ k = d.bit_length() - 1
+ result = _dec_from_triple(sign, str(n*5**k), -k)
+ if cls is Decimal:
+ return result
+ else:
+ return cls(result)
+ from_float = classmethod(from_float)
+
+ def _isnan(self):
+ """Returns whether the number is not actually one.
+
+ 0 if a number
+ 1 if NaN
+ 2 if sNaN
+ """
+ if self._is_special:
+ exp = self._exp
+ if exp == 'n':
+ return 1
+ elif exp == 'N':
+ return 2
+ return 0
+
+ def _isinfinity(self):
+ """Returns whether the number is infinite
+
+ 0 if finite or not a number
+ 1 if +INF
+ -1 if -INF
+ """
+ if self._exp == 'F':
+ if self._sign:
+ return -1
+ return 1
+ return 0
+
+ def _check_nans(self, other=None, context=None):
+ """Returns whether the number is not actually one.
+
+ if self, other are sNaN, signal
+ if self, other are NaN return nan
+ return 0
+
+ Done before operations.
+ """
+
+ self_is_nan = self._isnan()
+ if other is None:
+ other_is_nan = False
+ else:
+ other_is_nan = other._isnan()
+
+ if self_is_nan or other_is_nan:
+ if context is None:
+ context = getcontext()
+
+ if self_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ self)
+ if other_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ other)
+ if self_is_nan:
+ return self._fix_nan(context)
+
+ return other._fix_nan(context)
+ return 0
+
+ def _compare_check_nans(self, other, context):
+ """Version of _check_nans used for the signaling comparisons
+ compare_signal, __le__, __lt__, __ge__, __gt__.
+
+ Signal InvalidOperation if either self or other is a (quiet
+ or signaling) NaN. Signaling NaNs take precedence over quiet
+ NaNs.
+
+ Return 0 if neither operand is a NaN.
+
+ """
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ if self.is_snan():
+ return context._raise_error(InvalidOperation,
+ 'comparison involving sNaN',
+ self)
+ elif other.is_snan():
+ return context._raise_error(InvalidOperation,
+ 'comparison involving sNaN',
+ other)
+ elif self.is_qnan():
+ return context._raise_error(InvalidOperation,
+ 'comparison involving NaN',
+ self)
+ elif other.is_qnan():
+ return context._raise_error(InvalidOperation,
+ 'comparison involving NaN',
+ other)
+ return 0
+
+ def __nonzero__(self):
+ """Return True if self is nonzero; otherwise return False.
+
+ NaNs and infinities are considered nonzero.
+ """
+ return self._is_special or self._int != '0'
+
+ def _cmp(self, other):
+ """Compare the two non-NaN decimal instances self and other.
+
+ Returns -1 if self < other, 0 if self == other and 1
+ if self > other. This routine is for internal use only."""
+
+ if self._is_special or other._is_special:
+ self_inf = self._isinfinity()
+ other_inf = other._isinfinity()
+ if self_inf == other_inf:
+ return 0
+ elif self_inf < other_inf:
+ return -1
+ else:
+ return 1
+
+ # check for zeros; Decimal('0') == Decimal('-0')
+ if not self:
+ if not other:
+ return 0
+ else:
+ return -((-1)**other._sign)
+ if not other:
+ return (-1)**self._sign
+
+ # If different signs, neg one is less
+ if other._sign < self._sign:
+ return -1
+ if self._sign < other._sign:
+ return 1
+
+ self_adjusted = self.adjusted()
+ other_adjusted = other.adjusted()
+ if self_adjusted == other_adjusted:
+ self_padded = self._int + '0'*(self._exp - other._exp)
+ other_padded = other._int + '0'*(other._exp - self._exp)
+ if self_padded == other_padded:
+ return 0
+ elif self_padded < other_padded:
+ return -(-1)**self._sign
+ else:
+ return (-1)**self._sign
+ elif self_adjusted > other_adjusted:
+ return (-1)**self._sign
+ else: # self_adjusted < other_adjusted
+ return -((-1)**self._sign)
+
+ # Note: The Decimal standard doesn't cover rich comparisons for
+ # Decimals. In particular, the specification is silent on the
+ # subject of what should happen for a comparison involving a NaN.
+ # We take the following approach:
+ #
+ # == comparisons involving a quiet NaN always return False
+ # != comparisons involving a quiet NaN always return True
+ # == or != comparisons involving a signaling NaN signal
+ # InvalidOperation, and return False or True as above if the
+ # InvalidOperation is not trapped.
+ # <, >, <= and >= comparisons involving a (quiet or signaling)
+ # NaN signal InvalidOperation, and return False if the
+ # InvalidOperation is not trapped.
+ #
+ # This behavior is designed to conform as closely as possible to
+ # that specified by IEEE 754.
+
+ def __eq__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ if self._check_nans(other, context):
+ return False
+ return self._cmp(other) == 0
+
+ def __ne__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ if self._check_nans(other, context):
+ return True
+ return self._cmp(other) != 0
+
+ def __lt__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ ans = self._compare_check_nans(other, context)
+ if ans:
+ return False
+ return self._cmp(other) < 0
+
+ def __le__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ ans = self._compare_check_nans(other, context)
+ if ans:
+ return False
+ return self._cmp(other) <= 0
+
+ def __gt__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ ans = self._compare_check_nans(other, context)
+ if ans:
+ return False
+ return self._cmp(other) > 0
+
+ def __ge__(self, other, context=None):
+ other = _convert_other(other, allow_float=True)
+ if other is NotImplemented:
+ return other
+ ans = self._compare_check_nans(other, context)
+ if ans:
+ return False
+ return self._cmp(other) >= 0
+
+ def compare(self, other, context=None):
+ """Compares one to another.
+
+ -1 => a < b
+ 0 => a = b
+ 1 => a > b
+ NaN => one is NaN
+ Like __cmp__, but returns Decimal instances.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ # Compare(NaN, NaN) = NaN
+ if (self._is_special or other and other._is_special):
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ return Decimal(self._cmp(other))
+
+ def __hash__(self):
+ """x.__hash__() <==> hash(x)"""
+ # Decimal integers must hash the same as the ints
+ #
+ # The hash of a nonspecial noninteger Decimal must depend only
+ # on the value of that Decimal, and not on its representation.
+ # For example: hash(Decimal('100E-1')) == hash(Decimal('10')).
+
+ # Equality comparisons involving signaling nans can raise an
+ # exception; since equality checks are implicitly and
+ # unpredictably used when checking set and dict membership, we
+ # prevent signaling nans from being used as set elements or
+ # dict keys by making __hash__ raise an exception.
+ if self._is_special:
+ if self.is_snan():
+ raise TypeError('Cannot hash a signaling NaN value.')
+ elif self.is_nan():
+ # 0 to match hash(float('nan'))
+ return 0
+ else:
+ # values chosen to match hash(float('inf')) and
+ # hash(float('-inf')).
+ if self._sign:
+ return -271828
+ else:
+ return 314159
+
+ # In Python 2.7, we're allowing comparisons (but not
+ # arithmetic operations) between floats and Decimals; so if
+ # a Decimal instance is exactly representable as a float then
+ # its hash should match that of the float.
+ self_as_float = float(self)
+ if Decimal.from_float(self_as_float) == self:
+ return hash(self_as_float)
+
+ if self._isinteger():
+ op = _WorkRep(self.to_integral_value())
+ # to make computation feasible for Decimals with large
+ # exponent, we use the fact that hash(n) == hash(m) for
+ # any two nonzero integers n and m such that (i) n and m
+ # have the same sign, and (ii) n is congruent to m modulo
+ # 2**64-1. So we can replace hash((-1)**s*c*10**e) with
+ # hash((-1)**s*c*pow(10, e, 2**64-1).
+ return hash((-1)**op.sign*op.int*pow(10, op.exp, 2**64-1))
+ # The value of a nonzero nonspecial Decimal instance is
+ # faithfully represented by the triple consisting of its sign,
+ # its adjusted exponent, and its coefficient with trailing
+ # zeros removed.
+ return hash((self._sign,
+ self._exp+len(self._int),
+ self._int.rstrip('0')))
+
+ def as_tuple(self):
+ """Represents the number as a triple tuple.
+
+ To show the internals exactly as they are.
+ """
+ return DecimalTuple(self._sign, tuple(map(int, self._int)), self._exp)
+
+ def __repr__(self):
+ """Represents the number as an instance of Decimal."""
+ # Invariant: eval(repr(d)) == d
+ return "Decimal('%s')" % str(self)
+
+ def __str__(self, eng=False, context=None):
+ """Return string representation of the number in scientific notation.
+
+ Captures all of the information in the underlying representation.
+ """
+
+ sign = ['', '-'][self._sign]
+ if self._is_special:
+ if self._exp == 'F':
+ return sign + 'Infinity'
+ elif self._exp == 'n':
+ return sign + 'NaN' + self._int
+ else: # self._exp == 'N'
+ return sign + 'sNaN' + self._int
+
+ # number of digits of self._int to left of decimal point
+ leftdigits = self._exp + len(self._int)
+
+ # dotplace is number of digits of self._int to the left of the
+ # decimal point in the mantissa of the output string (that is,
+ # after adjusting the exponent)
+ if self._exp <= 0 and leftdigits > -6:
+ # no exponent required
+ dotplace = leftdigits
+ elif not eng:
+ # usual scientific notation: 1 digit on left of the point
+ dotplace = 1
+ elif self._int == '0':
+ # engineering notation, zero
+ dotplace = (leftdigits + 1) % 3 - 1
+ else:
+ # engineering notation, nonzero
+ dotplace = (leftdigits - 1) % 3 + 1
+
+ if dotplace <= 0:
+ intpart = '0'
+ fracpart = '.' + '0'*(-dotplace) + self._int
+ elif dotplace >= len(self._int):
+ intpart = self._int+'0'*(dotplace-len(self._int))
+ fracpart = ''
+ else:
+ intpart = self._int[:dotplace]
+ fracpart = '.' + self._int[dotplace:]
+ if leftdigits == dotplace:
+ exp = ''
+ else:
+ if context is None:
+ context = getcontext()
+ exp = ['e', 'E'][context.capitals] + "%+d" % (leftdigits-dotplace)
+
+ return sign + intpart + fracpart + exp
+
+ def to_eng_string(self, context=None):
+ """Convert to a string, using engineering notation if an exponent is needed.
+
+ Engineering notation has an exponent which is a multiple of 3. This
+ can leave up to 3 digits to the left of the decimal place and may
+ require the addition of either one or two trailing zeros.
+ """
+ return self.__str__(eng=True, context=context)
+
+ def __neg__(self, context=None):
+ """Returns a copy with the sign switched.
+
+ Rounds, if it has reason.
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if context is None:
+ context = getcontext()
+
+ if not self and context.rounding != ROUND_FLOOR:
+ # -Decimal('0') is Decimal('0'), not Decimal('-0'), except
+ # in ROUND_FLOOR rounding mode.
+ ans = self.copy_abs()
+ else:
+ ans = self.copy_negate()
+
+ return ans._fix(context)
+
+ def __pos__(self, context=None):
+ """Returns a copy, unless it is a sNaN.
+
+ Rounds the number (if more than precision digits)
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if context is None:
+ context = getcontext()
+
+ if not self and context.rounding != ROUND_FLOOR:
+ # + (-0) = 0, except in ROUND_FLOOR rounding mode.
+ ans = self.copy_abs()
+ else:
+ ans = Decimal(self)
+
+ return ans._fix(context)
+
+ def __abs__(self, round=True, context=None):
+ """Returns the absolute value of self.
+
+ If the keyword argument 'round' is false, do not round. The
+ expression self.__abs__(round=False) is equivalent to
+ self.copy_abs().
+ """
+ if not round:
+ return self.copy_abs()
+
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._sign:
+ ans = self.__neg__(context=context)
+ else:
+ ans = self.__pos__(context=context)
+
+ return ans
+
+ def __add__(self, other, context=None):
+ """Returns self + other.
+
+ -INF + INF (or the reverse) cause InvalidOperation errors.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ # If both INF, same sign => same as both, opposite => error.
+ if self._sign != other._sign and other._isinfinity():
+ return context._raise_error(InvalidOperation, '-INF + INF')
+ return Decimal(self)
+ if other._isinfinity():
+ return Decimal(other) # Can't both be infinity here
+
+ exp = min(self._exp, other._exp)
+ negativezero = 0
+ if context.rounding == ROUND_FLOOR and self._sign != other._sign:
+ # If the answer is 0, the sign should be negative, in this case.
+ negativezero = 1
+
+ if not self and not other:
+ sign = min(self._sign, other._sign)
+ if negativezero:
+ sign = 1
+ ans = _dec_from_triple(sign, '0', exp)
+ ans = ans._fix(context)
+ return ans
+ if not self:
+ exp = max(exp, other._exp - context.prec-1)
+ ans = other._rescale(exp, context.rounding)
+ ans = ans._fix(context)
+ return ans
+ if not other:
+ exp = max(exp, self._exp - context.prec-1)
+ ans = self._rescale(exp, context.rounding)
+ ans = ans._fix(context)
+ return ans
+
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ op1, op2 = _normalize(op1, op2, context.prec)
+
+ result = _WorkRep()
+ if op1.sign != op2.sign:
+ # Equal and opposite
+ if op1.int == op2.int:
+ ans = _dec_from_triple(negativezero, '0', exp)
+ ans = ans._fix(context)
+ return ans
+ if op1.int < op2.int:
+ op1, op2 = op2, op1
+ # OK, now abs(op1) > abs(op2)
+ if op1.sign == 1:
+ result.sign = 1
+ op1.sign, op2.sign = op2.sign, op1.sign
+ else:
+ result.sign = 0
+ # So we know the sign, and op1 > 0.
+ elif op1.sign == 1:
+ result.sign = 1
+ op1.sign, op2.sign = (0, 0)
+ else:
+ result.sign = 0
+ # Now, op1 > abs(op2) > 0
+
+ if op2.sign == 0:
+ result.int = op1.int + op2.int
+ else:
+ result.int = op1.int - op2.int
+
+ result.exp = op1.exp
+ ans = Decimal(result)
+ ans = ans._fix(context)
+ return ans
+
+ __radd__ = __add__
+
+ def __sub__(self, other, context=None):
+ """Return self - other"""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context=context)
+ if ans:
+ return ans
+
+ # self - other is computed as self + other.copy_negate()
+ return self.__add__(other.copy_negate(), context=context)
+
+ def __rsub__(self, other, context=None):
+ """Return other - self"""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ return other.__sub__(self, context=context)
+
+ def __mul__(self, other, context=None):
+ """Return self * other.
+
+ (+-) INF * 0 (or its reverse) raise InvalidOperation.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ resultsign = self._sign ^ other._sign
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ if not other:
+ return context._raise_error(InvalidOperation, '(+-)INF * 0')
+ return _SignedInfinity[resultsign]
+
+ if other._isinfinity():
+ if not self:
+ return context._raise_error(InvalidOperation, '0 * (+-)INF')
+ return _SignedInfinity[resultsign]
+
+ resultexp = self._exp + other._exp
+
+ # Special case for multiplying by zero
+ if not self or not other:
+ ans = _dec_from_triple(resultsign, '0', resultexp)
+ # Fixing in case the exponent is out of bounds
+ ans = ans._fix(context)
+ return ans
+
+ # Special case for multiplying by power of 10
+ if self._int == '1':
+ ans = _dec_from_triple(resultsign, other._int, resultexp)
+ ans = ans._fix(context)
+ return ans
+ if other._int == '1':
+ ans = _dec_from_triple(resultsign, self._int, resultexp)
+ ans = ans._fix(context)
+ return ans
+
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+
+ ans = _dec_from_triple(resultsign, str(op1.int * op2.int), resultexp)
+ ans = ans._fix(context)
+
+ return ans
+ __rmul__ = __mul__
+
+ def __truediv__(self, other, context=None):
+ """Return self / other."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return NotImplemented
+
+ if context is None:
+ context = getcontext()
+
+ sign = self._sign ^ other._sign
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity() and other._isinfinity():
+ return context._raise_error(InvalidOperation, '(+-)INF/(+-)INF')
+
+ if self._isinfinity():
+ return _SignedInfinity[sign]
+
+ if other._isinfinity():
+ context._raise_error(Clamped, 'Division by infinity')
+ return _dec_from_triple(sign, '0', context.Etiny())
+
+ # Special cases for zeroes
+ if not other:
+ if not self:
+ return context._raise_error(DivisionUndefined, '0 / 0')
+ return context._raise_error(DivisionByZero, 'x / 0', sign)
+
+ if not self:
+ exp = self._exp - other._exp
+ coeff = 0
+ else:
+ # OK, so neither = 0, INF or NaN
+ shift = len(other._int) - len(self._int) + context.prec + 1
+ exp = self._exp - other._exp - shift
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ if shift >= 0:
+ coeff, remainder = divmod(op1.int * 10**shift, op2.int)
+ else:
+ coeff, remainder = divmod(op1.int, op2.int * 10**-shift)
+ if remainder:
+ # result is not exact; adjust to ensure correct rounding
+ if coeff % 5 == 0:
+ coeff += 1
+ else:
+ # result is exact; get as close to ideal exponent as possible
+ ideal_exp = self._exp - other._exp
+ while exp < ideal_exp and coeff % 10 == 0:
+ coeff //= 10
+ exp += 1
+
+ ans = _dec_from_triple(sign, str(coeff), exp)
+ return ans._fix(context)
+
+ def _divide(self, other, context):
+ """Return (self // other, self % other), to context.prec precision.
+
+ Assumes that neither self nor other is a NaN, that self is not
+ infinite and that other is nonzero.
+ """
+ sign = self._sign ^ other._sign
+ if other._isinfinity():
+ ideal_exp = self._exp
+ else:
+ ideal_exp = min(self._exp, other._exp)
+
+ expdiff = self.adjusted() - other.adjusted()
+ if not self or other._isinfinity() or expdiff <= -2:
+ return (_dec_from_triple(sign, '0', 0),
+ self._rescale(ideal_exp, context.rounding))
+ if expdiff <= context.prec:
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ if op1.exp >= op2.exp:
+ op1.int *= 10**(op1.exp - op2.exp)
+ else:
+ op2.int *= 10**(op2.exp - op1.exp)
+ q, r = divmod(op1.int, op2.int)
+ if q < 10**context.prec:
+ return (_dec_from_triple(sign, str(q), 0),
+ _dec_from_triple(self._sign, str(r), ideal_exp))
+
+ # Here the quotient is too large to be representable
+ ans = context._raise_error(DivisionImpossible,
+ 'quotient too large in //, % or divmod')
+ return ans, ans
+
+ def __rtruediv__(self, other, context=None):
+ """Swaps self/other and returns __truediv__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__truediv__(self, context=context)
+
+ __div__ = __truediv__
+ __rdiv__ = __rtruediv__
+
+ def __divmod__(self, other, context=None):
+ """
+ Return (self // other, self % other)
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return (ans, ans)
+
+ sign = self._sign ^ other._sign
+ if self._isinfinity():
+ if other._isinfinity():
+ ans = context._raise_error(InvalidOperation, 'divmod(INF, INF)')
+ return ans, ans
+ else:
+ return (_SignedInfinity[sign],
+ context._raise_error(InvalidOperation, 'INF % x'))
+
+ if not other:
+ if not self:
+ ans = context._raise_error(DivisionUndefined, 'divmod(0, 0)')
+ return ans, ans
+ else:
+ return (context._raise_error(DivisionByZero, 'x // 0', sign),
+ context._raise_error(InvalidOperation, 'x % 0'))
+
+ quotient, remainder = self._divide(other, context)
+ remainder = remainder._fix(context)
+ return quotient, remainder
+
+ def __rdivmod__(self, other, context=None):
+ """Swaps self/other and returns __divmod__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__divmod__(self, context=context)
+
+ def __mod__(self, other, context=None):
+ """
+ self % other
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ return context._raise_error(InvalidOperation, 'INF % x')
+ elif not other:
+ if self:
+ return context._raise_error(InvalidOperation, 'x % 0')
+ else:
+ return context._raise_error(DivisionUndefined, '0 % 0')
+
+ remainder = self._divide(other, context)[1]
+ remainder = remainder._fix(context)
+ return remainder
+
+ def __rmod__(self, other, context=None):
+ """Swaps self/other and returns __mod__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__mod__(self, context=context)
+
+ def remainder_near(self, other, context=None):
+ """
+ Remainder nearest to 0- abs(remainder-near) <= other/2
+ """
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ # self == +/-infinity -> InvalidOperation
+ if self._isinfinity():
+ return context._raise_error(InvalidOperation,
+ 'remainder_near(infinity, x)')
+
+ # other == 0 -> either InvalidOperation or DivisionUndefined
+ if not other:
+ if self:
+ return context._raise_error(InvalidOperation,
+ 'remainder_near(x, 0)')
+ else:
+ return context._raise_error(DivisionUndefined,
+ 'remainder_near(0, 0)')
+
+ # other = +/-infinity -> remainder = self
+ if other._isinfinity():
+ ans = Decimal(self)
+ return ans._fix(context)
+
+ # self = 0 -> remainder = self, with ideal exponent
+ ideal_exponent = min(self._exp, other._exp)
+ if not self:
+ ans = _dec_from_triple(self._sign, '0', ideal_exponent)
+ return ans._fix(context)
+
+ # catch most cases of large or small quotient
+ expdiff = self.adjusted() - other.adjusted()
+ if expdiff >= context.prec + 1:
+ # expdiff >= prec+1 => abs(self/other) > 10**prec
+ return context._raise_error(DivisionImpossible)
+ if expdiff <= -2:
+ # expdiff <= -2 => abs(self/other) < 0.1
+ ans = self._rescale(ideal_exponent, context.rounding)
+ return ans._fix(context)
+
+ # adjust both arguments to have the same exponent, then divide
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ if op1.exp >= op2.exp:
+ op1.int *= 10**(op1.exp - op2.exp)
+ else:
+ op2.int *= 10**(op2.exp - op1.exp)
+ q, r = divmod(op1.int, op2.int)
+ # remainder is r*10**ideal_exponent; other is +/-op2.int *
+ # 10**ideal_exponent. Apply correction to ensure that
+ # abs(remainder) <= abs(other)/2
+ if 2*r + (q&1) > op2.int:
+ r -= op2.int
+ q += 1
+
+ if q >= 10**context.prec:
+ return context._raise_error(DivisionImpossible)
+
+ # result has same sign as self unless r is negative
+ sign = self._sign
+ if r < 0:
+ sign = 1-sign
+ r = -r
+
+ ans = _dec_from_triple(sign, str(r), ideal_exponent)
+ return ans._fix(context)
+
+ def __floordiv__(self, other, context=None):
+ """self // other"""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ if other._isinfinity():
+ return context._raise_error(InvalidOperation, 'INF // INF')
+ else:
+ return _SignedInfinity[self._sign ^ other._sign]
+
+ if not other:
+ if self:
+ return context._raise_error(DivisionByZero, 'x // 0',
+ self._sign ^ other._sign)
+ else:
+ return context._raise_error(DivisionUndefined, '0 // 0')
+
+ return self._divide(other, context)[0]
+
+ def __rfloordiv__(self, other, context=None):
+ """Swaps self/other and returns __floordiv__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__floordiv__(self, context=context)
+
+ def __float__(self):
+ """Float representation."""
+ if self._isnan():
+ if self.is_snan():
+ raise ValueError("Cannot convert signaling NaN to float")
+ s = "-nan" if self._sign else "nan"
+ else:
+ s = str(self)
+ return float(s)
+
+ def __int__(self):
+ """Converts self to an int, truncating if necessary."""
+ if self._is_special:
+ if self._isnan():
+ raise ValueError("Cannot convert NaN to integer")
+ elif self._isinfinity():
+ raise OverflowError("Cannot convert infinity to integer")
+ s = (-1)**self._sign
+ if self._exp >= 0:
+ return s*int(self._int)*10**self._exp
+ else:
+ return s*int(self._int[:self._exp] or '0')
+
+ __trunc__ = __int__
+
+ def real(self):
+ return self
+ real = property(real)
+
+ def imag(self):
+ return Decimal(0)
+ imag = property(imag)
+
+ def conjugate(self):
+ return self
+
+ def __complex__(self):
+ return complex(float(self))
+
+ def __long__(self):
+ """Converts to a long.
+
+ Equivalent to long(int(self))
+ """
+ return long(self.__int__())
+
+ def _fix_nan(self, context):
+ """Decapitate the payload of a NaN to fit the context"""
+ payload = self._int
+
+ # maximum length of payload is precision if _clamp=0,
+ # precision-1 if _clamp=1.
+ max_payload_len = context.prec - context._clamp
+ if len(payload) > max_payload_len:
+ payload = payload[len(payload)-max_payload_len:].lstrip('0')
+ return _dec_from_triple(self._sign, payload, self._exp, True)
+ return Decimal(self)
+
+ def _fix(self, context):
+ """Round if it is necessary to keep self within prec precision.
+
+ Rounds and fixes the exponent. Does not raise on a sNaN.
+
+ Arguments:
+ self - Decimal instance
+ context - context used.
+ """
+
+ if self._is_special:
+ if self._isnan():
+ # decapitate payload if necessary
+ return self._fix_nan(context)
+ else:
+ # self is +/-Infinity; return unaltered
+ return Decimal(self)
+
+ # if self is zero then exponent should be between Etiny and
+ # Emax if _clamp==0, and between Etiny and Etop if _clamp==1.
+ Etiny = context.Etiny()
+ Etop = context.Etop()
+ if not self:
+ exp_max = [context.Emax, Etop][context._clamp]
+ new_exp = min(max(self._exp, Etiny), exp_max)
+ if new_exp != self._exp:
+ context._raise_error(Clamped)
+ return _dec_from_triple(self._sign, '0', new_exp)
+ else:
+ return Decimal(self)
+
+ # exp_min is the smallest allowable exponent of the result,
+ # equal to max(self.adjusted()-context.prec+1, Etiny)
+ exp_min = len(self._int) + self._exp - context.prec
+ if exp_min > Etop:
+ # overflow: exp_min > Etop iff self.adjusted() > Emax
+ ans = context._raise_error(Overflow, 'above Emax', self._sign)
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ return ans
+
+ self_is_subnormal = exp_min < Etiny
+ if self_is_subnormal:
+ exp_min = Etiny
+
+ # round if self has too many digits
+ if self._exp < exp_min:
+ digits = len(self._int) + self._exp - exp_min
+ if digits < 0:
+ self = _dec_from_triple(self._sign, '1', exp_min-1)
+ digits = 0
+ rounding_method = self._pick_rounding_function[context.rounding]
+ changed = rounding_method(self, digits)
+ coeff = self._int[:digits] or '0'
+ if changed > 0:
+ coeff = str(int(coeff)+1)
+ if len(coeff) > context.prec:
+ coeff = coeff[:-1]
+ exp_min += 1
+
+ # check whether the rounding pushed the exponent out of range
+ if exp_min > Etop:
+ ans = context._raise_error(Overflow, 'above Emax', self._sign)
+ else:
+ ans = _dec_from_triple(self._sign, coeff, exp_min)
+
+ # raise the appropriate signals, taking care to respect
+ # the precedence described in the specification
+ if changed and self_is_subnormal:
+ context._raise_error(Underflow)
+ if self_is_subnormal:
+ context._raise_error(Subnormal)
+ if changed:
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ if not ans:
+ # raise Clamped on underflow to 0
+ context._raise_error(Clamped)
+ return ans
+
+ if self_is_subnormal:
+ context._raise_error(Subnormal)
+
+ # fold down if _clamp == 1 and self has too few digits
+ if context._clamp == 1 and self._exp > Etop:
+ context._raise_error(Clamped)
+ self_padded = self._int + '0'*(self._exp - Etop)
+ return _dec_from_triple(self._sign, self_padded, Etop)
+
+ # here self was representable to begin with; return unchanged
+ return Decimal(self)
+
+ # for each of the rounding functions below:
+ # self is a finite, nonzero Decimal
+ # prec is an integer satisfying 0 <= prec < len(self._int)
+ #
+ # each function returns either -1, 0, or 1, as follows:
+ # 1 indicates that self should be rounded up (away from zero)
+ # 0 indicates that self should be truncated, and that all the
+ # digits to be truncated are zeros (so the value is unchanged)
+ # -1 indicates that there are nonzero digits to be truncated
+
+ def _round_down(self, prec):
+ """Also known as round-towards-0, truncate."""
+ if _all_zeros(self._int, prec):
+ return 0
+ else:
+ return -1
+
+ def _round_up(self, prec):
+ """Rounds away from 0."""
+ return -self._round_down(prec)
+
+ def _round_half_up(self, prec):
+ """Rounds 5 up (away from 0)"""
+ if self._int[prec] in '56789':
+ return 1
+ elif _all_zeros(self._int, prec):
+ return 0
+ else:
+ return -1
+
+ def _round_half_down(self, prec):
+ """Round 5 down"""
+ if _exact_half(self._int, prec):
+ return -1
+ else:
+ return self._round_half_up(prec)
+
+ def _round_half_even(self, prec):
+ """Round 5 to even, rest to nearest."""
+ if _exact_half(self._int, prec) and \
+ (prec == 0 or self._int[prec-1] in '02468'):
+ return -1
+ else:
+ return self._round_half_up(prec)
+
+ def _round_ceiling(self, prec):
+ """Rounds up (not away from 0 if negative.)"""
+ if self._sign:
+ return self._round_down(prec)
+ else:
+ return -self._round_down(prec)
+
+ def _round_floor(self, prec):
+ """Rounds down (not towards 0 if negative)"""
+ if not self._sign:
+ return self._round_down(prec)
+ else:
+ return -self._round_down(prec)
+
+ def _round_05up(self, prec):
+ """Round down unless digit prec-1 is 0 or 5."""
+ if prec and self._int[prec-1] not in '05':
+ return self._round_down(prec)
+ else:
+ return -self._round_down(prec)
+
+ _pick_rounding_function = dict(
+ ROUND_DOWN = _round_down,
+ ROUND_UP = _round_up,
+ ROUND_HALF_UP = _round_half_up,
+ ROUND_HALF_DOWN = _round_half_down,
+ ROUND_HALF_EVEN = _round_half_even,
+ ROUND_CEILING = _round_ceiling,
+ ROUND_FLOOR = _round_floor,
+ ROUND_05UP = _round_05up,
+ )
+
+ def fma(self, other, third, context=None):
+ """Fused multiply-add.
+
+ Returns self*other+third with no rounding of the intermediate
+ product self*other.
+
+ self and other are multiplied together, with no rounding of
+ the result. The third operand is then added to the result,
+ and a single final rounding is performed.
+ """
+
+ other = _convert_other(other, raiseit=True)
+
+ # compute product; raise InvalidOperation if either operand is
+ # a signaling NaN or if the product is zero times infinity.
+ if self._is_special or other._is_special:
+ if context is None:
+ context = getcontext()
+ if self._exp == 'N':
+ return context._raise_error(InvalidOperation, 'sNaN', self)
+ if other._exp == 'N':
+ return context._raise_error(InvalidOperation, 'sNaN', other)
+ if self._exp == 'n':
+ product = self
+ elif other._exp == 'n':
+ product = other
+ elif self._exp == 'F':
+ if not other:
+ return context._raise_error(InvalidOperation,
+ 'INF * 0 in fma')
+ product = _SignedInfinity[self._sign ^ other._sign]
+ elif other._exp == 'F':
+ if not self:
+ return context._raise_error(InvalidOperation,
+ '0 * INF in fma')
+ product = _SignedInfinity[self._sign ^ other._sign]
+ else:
+ product = _dec_from_triple(self._sign ^ other._sign,
+ str(int(self._int) * int(other._int)),
+ self._exp + other._exp)
+
+ third = _convert_other(third, raiseit=True)
+ return product.__add__(third, context)
+
+ def _power_modulo(self, other, modulo, context=None):
+ """Three argument version of __pow__"""
+
+ # if can't convert other and modulo to Decimal, raise
+ # TypeError; there's no point returning NotImplemented (no
+ # equivalent of __rpow__ for three argument pow)
+ other = _convert_other(other, raiseit=True)
+ modulo = _convert_other(modulo, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ # deal with NaNs: if there are any sNaNs then first one wins,
+ # (i.e. behaviour for NaNs is identical to that of fma)
+ self_is_nan = self._isnan()
+ other_is_nan = other._isnan()
+ modulo_is_nan = modulo._isnan()
+ if self_is_nan or other_is_nan or modulo_is_nan:
+ if self_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ self)
+ if other_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ other)
+ if modulo_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ modulo)
+ if self_is_nan:
+ return self._fix_nan(context)
+ if other_is_nan:
+ return other._fix_nan(context)
+ return modulo._fix_nan(context)
+
+ # check inputs: we apply same restrictions as Python's pow()
+ if not (self._isinteger() and
+ other._isinteger() and
+ modulo._isinteger()):
+ return context._raise_error(InvalidOperation,
+ 'pow() 3rd argument not allowed '
+ 'unless all arguments are integers')
+ if other < 0:
+ return context._raise_error(InvalidOperation,
+ 'pow() 2nd argument cannot be '
+ 'negative when 3rd argument specified')
+ if not modulo:
+ return context._raise_error(InvalidOperation,
+ 'pow() 3rd argument cannot be 0')
+
+ # additional restriction for decimal: the modulus must be less
+ # than 10**prec in absolute value
+ if modulo.adjusted() >= context.prec:
+ return context._raise_error(InvalidOperation,
+ 'insufficient precision: pow() 3rd '
+ 'argument must not have more than '
+ 'precision digits')
+
+ # define 0**0 == NaN, for consistency with two-argument pow
+ # (even though it hurts!)
+ if not other and not self:
+ return context._raise_error(InvalidOperation,
+ 'at least one of pow() 1st argument '
+ 'and 2nd argument must be nonzero ;'
+ '0**0 is not defined')
+
+ # compute sign of result
+ if other._iseven():
+ sign = 0
+ else:
+ sign = self._sign
+
+ # convert modulo to a Python integer, and self and other to
+ # Decimal integers (i.e. force their exponents to be >= 0)
+ modulo = abs(int(modulo))
+ base = _WorkRep(self.to_integral_value())
+ exponent = _WorkRep(other.to_integral_value())
+
+ # compute result using integer pow()
+ base = (base.int % modulo * pow(10, base.exp, modulo)) % modulo
+ for i in xrange(exponent.exp):
+ base = pow(base, 10, modulo)
+ base = pow(base, exponent.int, modulo)
+
+ return _dec_from_triple(sign, str(base), 0)
+
+ def _power_exact(self, other, p):
+ """Attempt to compute self**other exactly.
+
+ Given Decimals self and other and an integer p, attempt to
+ compute an exact result for the power self**other, with p
+ digits of precision. Return None if self**other is not
+ exactly representable in p digits.
+
+ Assumes that elimination of special cases has already been
+ performed: self and other must both be nonspecial; self must
+ be positive and not numerically equal to 1; other must be
+ nonzero. For efficiency, other._exp should not be too large,
+ so that 10**abs(other._exp) is a feasible calculation."""
+
+ # In the comments below, we write x for the value of self and y for the
+ # value of other. Write x = xc*10**xe and abs(y) = yc*10**ye, with xc
+ # and yc positive integers not divisible by 10.
+
+ # The main purpose of this method is to identify the *failure*
+ # of x**y to be exactly representable with as little effort as
+ # possible. So we look for cheap and easy tests that
+ # eliminate the possibility of x**y being exact. Only if all
+ # these tests are passed do we go on to actually compute x**y.
+
+ # Here's the main idea. Express y as a rational number m/n, with m and
+ # n relatively prime and n>0. Then for x**y to be exactly
+ # representable (at *any* precision), xc must be the nth power of a
+ # positive integer and xe must be divisible by n. If y is negative
+ # then additionally xc must be a power of either 2 or 5, hence a power
+ # of 2**n or 5**n.
+ #
+ # There's a limit to how small |y| can be: if y=m/n as above
+ # then:
+ #
+ # (1) if xc != 1 then for the result to be representable we
+ # need xc**(1/n) >= 2, and hence also xc**|y| >= 2. So
+ # if |y| <= 1/nbits(xc) then xc < 2**nbits(xc) <=
+ # 2**(1/|y|), hence xc**|y| < 2 and the result is not
+ # representable.
+ #
+ # (2) if xe != 0, |xe|*(1/n) >= 1, so |xe|*|y| >= 1. Hence if
+ # |y| < 1/|xe| then the result is not representable.
+ #
+ # Note that since x is not equal to 1, at least one of (1) and
+ # (2) must apply. Now |y| < 1/nbits(xc) iff |yc|*nbits(xc) <
+ # 10**-ye iff len(str(|yc|*nbits(xc)) <= -ye.
+ #
+ # There's also a limit to how large y can be, at least if it's
+ # positive: the normalized result will have coefficient xc**y,
+ # so if it's representable then xc**y < 10**p, and y <
+ # p/log10(xc). Hence if y*log10(xc) >= p then the result is
+ # not exactly representable.
+
+ # if len(str(abs(yc*xe)) <= -ye then abs(yc*xe) < 10**-ye,
+ # so |y| < 1/xe and the result is not representable.
+ # Similarly, len(str(abs(yc)*xc_bits)) <= -ye implies |y|
+ # < 1/nbits(xc).
+
+ x = _WorkRep(self)
+ xc, xe = x.int, x.exp
+ while xc % 10 == 0:
+ xc //= 10
+ xe += 1
+
+ y = _WorkRep(other)
+ yc, ye = y.int, y.exp
+ while yc % 10 == 0:
+ yc //= 10
+ ye += 1
+
+ # case where xc == 1: result is 10**(xe*y), with xe*y
+ # required to be an integer
+ if xc == 1:
+ xe *= yc
+ # result is now 10**(xe * 10**ye); xe * 10**ye must be integral
+ while xe % 10 == 0:
+ xe //= 10
+ ye += 1
+ if ye < 0:
+ return None
+ exponent = xe * 10**ye
+ if y.sign == 1:
+ exponent = -exponent
+ # if other is a nonnegative integer, use ideal exponent
+ if other._isinteger() and other._sign == 0:
+ ideal_exponent = self._exp*int(other)
+ zeros = min(exponent-ideal_exponent, p-1)
+ else:
+ zeros = 0
+ return _dec_from_triple(0, '1' + '0'*zeros, exponent-zeros)
+
+ # case where y is negative: xc must be either a power
+ # of 2 or a power of 5.
+ if y.sign == 1:
+ last_digit = xc % 10
+ if last_digit in (2,4,6,8):
+ # quick test for power of 2
+ if xc & -xc != xc:
+ return None
+ # now xc is a power of 2; e is its exponent
+ e = _nbits(xc)-1
+
+ # We now have:
+ #
+ # x = 2**e * 10**xe, e > 0, and y < 0.
+ #
+ # The exact result is:
+ #
+ # x**y = 5**(-e*y) * 10**(e*y + xe*y)
+ #
+ # provided that both e*y and xe*y are integers. Note that if
+ # 5**(-e*y) >= 10**p, then the result can't be expressed
+ # exactly with p digits of precision.
+ #
+ # Using the above, we can guard against large values of ye.
+ # 93/65 is an upper bound for log(10)/log(5), so if
+ #
+ # ye >= len(str(93*p//65))
+ #
+ # then
+ #
+ # -e*y >= -y >= 10**ye > 93*p/65 > p*log(10)/log(5),
+ #
+ # so 5**(-e*y) >= 10**p, and the coefficient of the result
+ # can't be expressed in p digits.
+
+ # emax >= largest e such that 5**e < 10**p.
+ emax = p*93//65
+ if ye >= len(str(emax)):
+ return None
+
+ # Find -e*y and -xe*y; both must be integers
+ e = _decimal_lshift_exact(e * yc, ye)
+ xe = _decimal_lshift_exact(xe * yc, ye)
+ if e is None or xe is None:
+ return None
+
+ if e > emax:
+ return None
+ xc = 5**e
+
+ elif last_digit == 5:
+ # e >= log_5(xc) if xc is a power of 5; we have
+ # equality all the way up to xc=5**2658
+ e = _nbits(xc)*28//65
+ xc, remainder = divmod(5**e, xc)
+ if remainder:
+ return None
+ while xc % 5 == 0:
+ xc //= 5
+ e -= 1
+
+ # Guard against large values of ye, using the same logic as in
+ # the 'xc is a power of 2' branch. 10/3 is an upper bound for
+ # log(10)/log(2).
+ emax = p*10//3
+ if ye >= len(str(emax)):
+ return None
+
+ e = _decimal_lshift_exact(e * yc, ye)
+ xe = _decimal_lshift_exact(xe * yc, ye)
+ if e is None or xe is None:
+ return None
+
+ if e > emax:
+ return None
+ xc = 2**e
+ else:
+ return None
+
+ if xc >= 10**p:
+ return None
+ xe = -e-xe
+ return _dec_from_triple(0, str(xc), xe)
+
+ # now y is positive; find m and n such that y = m/n
+ if ye >= 0:
+ m, n = yc*10**ye, 1
+ else:
+ if xe != 0 and len(str(abs(yc*xe))) <= -ye:
+ return None
+ xc_bits = _nbits(xc)
+ if xc != 1 and len(str(abs(yc)*xc_bits)) <= -ye:
+ return None
+ m, n = yc, 10**(-ye)
+ while m % 2 == n % 2 == 0:
+ m //= 2
+ n //= 2
+ while m % 5 == n % 5 == 0:
+ m //= 5
+ n //= 5
+
+ # compute nth root of xc*10**xe
+ if n > 1:
+ # if 1 < xc < 2**n then xc isn't an nth power
+ if xc != 1 and xc_bits <= n:
+ return None
+
+ xe, rem = divmod(xe, n)
+ if rem != 0:
+ return None
+
+ # compute nth root of xc using Newton's method
+ a = 1L << -(-_nbits(xc)//n) # initial estimate
+ while True:
+ q, r = divmod(xc, a**(n-1))
+ if a <= q:
+ break
+ else:
+ a = (a*(n-1) + q)//n
+ if not (a == q and r == 0):
+ return None
+ xc = a
+
+ # now xc*10**xe is the nth root of the original xc*10**xe
+ # compute mth power of xc*10**xe
+
+ # if m > p*100//_log10_lb(xc) then m > p/log10(xc), hence xc**m >
+ # 10**p and the result is not representable.
+ if xc > 1 and m > p*100//_log10_lb(xc):
+ return None
+ xc = xc**m
+ xe *= m
+ if xc > 10**p:
+ return None
+
+ # by this point the result *is* exactly representable
+ # adjust the exponent to get as close as possible to the ideal
+ # exponent, if necessary
+ str_xc = str(xc)
+ if other._isinteger() and other._sign == 0:
+ ideal_exponent = self._exp*int(other)
+ zeros = min(xe-ideal_exponent, p-len(str_xc))
+ else:
+ zeros = 0
+ return _dec_from_triple(0, str_xc+'0'*zeros, xe-zeros)
+
+ def __pow__(self, other, modulo=None, context=None):
+ """Return self ** other [ % modulo].
+
+ With two arguments, compute self**other.
+
+ With three arguments, compute (self**other) % modulo. For the
+ three argument form, the following restrictions on the
+ arguments hold:
+
+ - all three arguments must be integral
+ - other must be nonnegative
+ - either self or other (or both) must be nonzero
+ - modulo must be nonzero and must have at most p digits,
+ where p is the context precision.
+
+ If any of these restrictions is violated the InvalidOperation
+ flag is raised.
+
+ The result of pow(self, other, modulo) is identical to the
+ result that would be obtained by computing (self**other) %
+ modulo with unbounded precision, but is computed more
+ efficiently. It is always exact.
+ """
+
+ if modulo is not None:
+ return self._power_modulo(other, modulo, context)
+
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ # either argument is a NaN => result is NaN
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ # 0**0 = NaN (!), x**0 = 1 for nonzero x (including +/-Infinity)
+ if not other:
+ if not self:
+ return context._raise_error(InvalidOperation, '0 ** 0')
+ else:
+ return _One
+
+ # result has sign 1 iff self._sign is 1 and other is an odd integer
+ result_sign = 0
+ if self._sign == 1:
+ if other._isinteger():
+ if not other._iseven():
+ result_sign = 1
+ else:
+ # -ve**noninteger = NaN
+ # (-0)**noninteger = 0**noninteger
+ if self:
+ return context._raise_error(InvalidOperation,
+ 'x ** y with x negative and y not an integer')
+ # negate self, without doing any unwanted rounding
+ self = self.copy_negate()
+
+ # 0**(+ve or Inf)= 0; 0**(-ve or -Inf) = Infinity
+ if not self:
+ if other._sign == 0:
+ return _dec_from_triple(result_sign, '0', 0)
+ else:
+ return _SignedInfinity[result_sign]
+
+ # Inf**(+ve or Inf) = Inf; Inf**(-ve or -Inf) = 0
+ if self._isinfinity():
+ if other._sign == 0:
+ return _SignedInfinity[result_sign]
+ else:
+ return _dec_from_triple(result_sign, '0', 0)
+
+ # 1**other = 1, but the choice of exponent and the flags
+ # depend on the exponent of self, and on whether other is a
+ # positive integer, a negative integer, or neither
+ if self == _One:
+ if other._isinteger():
+ # exp = max(self._exp*max(int(other), 0),
+ # 1-context.prec) but evaluating int(other) directly
+ # is dangerous until we know other is small (other
+ # could be 1e999999999)
+ if other._sign == 1:
+ multiplier = 0
+ elif other > context.prec:
+ multiplier = context.prec
+ else:
+ multiplier = int(other)
+
+ exp = self._exp * multiplier
+ if exp < 1-context.prec:
+ exp = 1-context.prec
+ context._raise_error(Rounded)
+ else:
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ exp = 1-context.prec
+
+ return _dec_from_triple(result_sign, '1'+'0'*-exp, exp)
+
+ # compute adjusted exponent of self
+ self_adj = self.adjusted()
+
+ # self ** infinity is infinity if self > 1, 0 if self < 1
+ # self ** -infinity is infinity if self < 1, 0 if self > 1
+ if other._isinfinity():
+ if (other._sign == 0) == (self_adj < 0):
+ return _dec_from_triple(result_sign, '0', 0)
+ else:
+ return _SignedInfinity[result_sign]
+
+ # from here on, the result always goes through the call
+ # to _fix at the end of this function.
+ ans = None
+ exact = False
+
+ # crude test to catch cases of extreme overflow/underflow. If
+ # log10(self)*other >= 10**bound and bound >= len(str(Emax))
+ # then 10**bound >= 10**len(str(Emax)) >= Emax+1 and hence
+ # self**other >= 10**(Emax+1), so overflow occurs. The test
+ # for underflow is similar.
+ bound = self._log10_exp_bound() + other.adjusted()
+ if (self_adj >= 0) == (other._sign == 0):
+ # self > 1 and other +ve, or self < 1 and other -ve
+ # possibility of overflow
+ if bound >= len(str(context.Emax)):
+ ans = _dec_from_triple(result_sign, '1', context.Emax+1)
+ else:
+ # self > 1 and other -ve, or self < 1 and other +ve
+ # possibility of underflow to 0
+ Etiny = context.Etiny()
+ if bound >= len(str(-Etiny)):
+ ans = _dec_from_triple(result_sign, '1', Etiny-1)
+
+ # try for an exact result with precision +1
+ if ans is None:
+ ans = self._power_exact(other, context.prec + 1)
+ if ans is not None:
+ if result_sign == 1:
+ ans = _dec_from_triple(1, ans._int, ans._exp)
+ exact = True
+
+ # usual case: inexact result, x**y computed directly as exp(y*log(x))
+ if ans is None:
+ p = context.prec
+ x = _WorkRep(self)
+ xc, xe = x.int, x.exp
+ y = _WorkRep(other)
+ yc, ye = y.int, y.exp
+ if y.sign == 1:
+ yc = -yc
+
+ # compute correctly rounded result: start with precision +3,
+ # then increase precision until result is unambiguously roundable
+ extra = 3
+ while True:
+ coeff, exp = _dpower(xc, xe, yc, ye, p+extra)
+ if coeff % (5*10**(len(str(coeff))-p-1)):
+ break
+ extra += 3
+
+ ans = _dec_from_triple(result_sign, str(coeff), exp)
+
+ # unlike exp, ln and log10, the power function respects the
+ # rounding mode; no need to switch to ROUND_HALF_EVEN here
+
+ # There's a difficulty here when 'other' is not an integer and
+ # the result is exact. In this case, the specification
+ # requires that the Inexact flag be raised (in spite of
+ # exactness), but since the result is exact _fix won't do this
+ # for us. (Correspondingly, the Underflow signal should also
+ # be raised for subnormal results.) We can't directly raise
+ # these signals either before or after calling _fix, since
+ # that would violate the precedence for signals. So we wrap
+ # the ._fix call in a temporary context, and reraise
+ # afterwards.
+ if exact and not other._isinteger():
+ # pad with zeros up to length context.prec+1 if necessary; this
+ # ensures that the Rounded signal will be raised.
+ if len(ans._int) <= context.prec:
+ expdiff = context.prec + 1 - len(ans._int)
+ ans = _dec_from_triple(ans._sign, ans._int+'0'*expdiff,
+ ans._exp-expdiff)
+
+ # create a copy of the current context, with cleared flags/traps
+ newcontext = context.copy()
+ newcontext.clear_flags()
+ for exception in _signals:
+ newcontext.traps[exception] = 0
+
+ # round in the new context
+ ans = ans._fix(newcontext)
+
+ # raise Inexact, and if necessary, Underflow
+ newcontext._raise_error(Inexact)
+ if newcontext.flags[Subnormal]:
+ newcontext._raise_error(Underflow)
+
+ # propagate signals to the original context; _fix could
+ # have raised any of Overflow, Underflow, Subnormal,
+ # Inexact, Rounded, Clamped. Overflow needs the correct
+ # arguments. Note that the order of the exceptions is
+ # important here.
+ if newcontext.flags[Overflow]:
+ context._raise_error(Overflow, 'above Emax', ans._sign)
+ for exception in Underflow, Subnormal, Inexact, Rounded, Clamped:
+ if newcontext.flags[exception]:
+ context._raise_error(exception)
+
+ else:
+ ans = ans._fix(context)
+
+ return ans
+
+ def __rpow__(self, other, context=None):
+ """Swaps self/other and returns __pow__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__pow__(self, context=context)
+
+ def normalize(self, context=None):
+ """Normalize- strip trailing 0s, change anything equal to 0 to 0e0"""
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ dup = self._fix(context)
+ if dup._isinfinity():
+ return dup
+
+ if not dup:
+ return _dec_from_triple(dup._sign, '0', 0)
+ exp_max = [context.Emax, context.Etop()][context._clamp]
+ end = len(dup._int)
+ exp = dup._exp
+ while dup._int[end-1] == '0' and exp < exp_max:
+ exp += 1
+ end -= 1
+ return _dec_from_triple(dup._sign, dup._int[:end], exp)
+
+ def quantize(self, exp, rounding=None, context=None, watchexp=True):
+ """Quantize self so its exponent is the same as that of exp.
+
+ Similar to self._rescale(exp._exp) but with error checking.
+ """
+ exp = _convert_other(exp, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+ if rounding is None:
+ rounding = context.rounding
+
+ if self._is_special or exp._is_special:
+ ans = self._check_nans(exp, context)
+ if ans:
+ return ans
+
+ if exp._isinfinity() or self._isinfinity():
+ if exp._isinfinity() and self._isinfinity():
+ return Decimal(self) # if both are inf, it is OK
+ return context._raise_error(InvalidOperation,
+ 'quantize with one INF')
+
+ # if we're not watching exponents, do a simple rescale
+ if not watchexp:
+ ans = self._rescale(exp._exp, rounding)
+ # raise Inexact and Rounded where appropriate
+ if ans._exp > self._exp:
+ context._raise_error(Rounded)
+ if ans != self:
+ context._raise_error(Inexact)
+ return ans
+
+ # exp._exp should be between Etiny and Emax
+ if not (context.Etiny() <= exp._exp <= context.Emax):
+ return context._raise_error(InvalidOperation,
+ 'target exponent out of bounds in quantize')
+
+ if not self:
+ ans = _dec_from_triple(self._sign, '0', exp._exp)
+ return ans._fix(context)
+
+ self_adjusted = self.adjusted()
+ if self_adjusted > context.Emax:
+ return context._raise_error(InvalidOperation,
+ 'exponent of quantize result too large for current context')
+ if self_adjusted - exp._exp + 1 > context.prec:
+ return context._raise_error(InvalidOperation,
+ 'quantize result has too many digits for current context')
+
+ ans = self._rescale(exp._exp, rounding)
+ if ans.adjusted() > context.Emax:
+ return context._raise_error(InvalidOperation,
+ 'exponent of quantize result too large for current context')
+ if len(ans._int) > context.prec:
+ return context._raise_error(InvalidOperation,
+ 'quantize result has too many digits for current context')
+
+ # raise appropriate flags
+ if ans and ans.adjusted() < context.Emin:
+ context._raise_error(Subnormal)
+ if ans._exp > self._exp:
+ if ans != self:
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+
+ # call to fix takes care of any necessary folddown, and
+ # signals Clamped if necessary
+ ans = ans._fix(context)
+ return ans
+
+ def same_quantum(self, other):
+ """Return True if self and other have the same exponent; otherwise
+ return False.
+
+ If either operand is a special value, the following rules are used:
+ * return True if both operands are infinities
+ * return True if both operands are NaNs
+ * otherwise, return False.
+ """
+ other = _convert_other(other, raiseit=True)
+ if self._is_special or other._is_special:
+ return (self.is_nan() and other.is_nan() or
+ self.is_infinite() and other.is_infinite())
+ return self._exp == other._exp
+
+ def _rescale(self, exp, rounding):
+ """Rescale self so that the exponent is exp, either by padding with zeros
+ or by truncating digits, using the given rounding mode.
+
+ Specials are returned without change. This operation is
+ quiet: it raises no flags, and uses no information from the
+ context.
+
+ exp = exp to scale to (an integer)
+ rounding = rounding mode
+ """
+ if self._is_special:
+ return Decimal(self)
+ if not self:
+ return _dec_from_triple(self._sign, '0', exp)
+
+ if self._exp >= exp:
+ # pad answer with zeros if necessary
+ return _dec_from_triple(self._sign,
+ self._int + '0'*(self._exp - exp), exp)
+
+ # too many digits; round and lose data. If self.adjusted() <
+ # exp-1, replace self by 10**(exp-1) before rounding
+ digits = len(self._int) + self._exp - exp
+ if digits < 0:
+ self = _dec_from_triple(self._sign, '1', exp-1)
+ digits = 0
+ this_function = self._pick_rounding_function[rounding]
+ changed = this_function(self, digits)
+ coeff = self._int[:digits] or '0'
+ if changed == 1:
+ coeff = str(int(coeff)+1)
+ return _dec_from_triple(self._sign, coeff, exp)
+
+ def _round(self, places, rounding):
+ """Round a nonzero, nonspecial Decimal to a fixed number of
+ significant figures, using the given rounding mode.
+
+ Infinities, NaNs and zeros are returned unaltered.
+
+ This operation is quiet: it raises no flags, and uses no
+ information from the context.
+
+ """
+ if places <= 0:
+ raise ValueError("argument should be at least 1 in _round")
+ if self._is_special or not self:
+ return Decimal(self)
+ ans = self._rescale(self.adjusted()+1-places, rounding)
+ # it can happen that the rescale alters the adjusted exponent;
+ # for example when rounding 99.97 to 3 significant figures.
+ # When this happens we end up with an extra 0 at the end of
+ # the number; a second rescale fixes this.
+ if ans.adjusted() != self.adjusted():
+ ans = ans._rescale(ans.adjusted()+1-places, rounding)
+ return ans
+
+ def to_integral_exact(self, rounding=None, context=None):
+ """Rounds to a nearby integer.
+
+ If no rounding mode is specified, take the rounding mode from
+ the context. This method raises the Rounded and Inexact flags
+ when appropriate.
+
+ See also: to_integral_value, which does exactly the same as
+ this method except that it doesn't raise Inexact or Rounded.
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+ return Decimal(self)
+ if self._exp >= 0:
+ return Decimal(self)
+ if not self:
+ return _dec_from_triple(self._sign, '0', 0)
+ if context is None:
+ context = getcontext()
+ if rounding is None:
+ rounding = context.rounding
+ ans = self._rescale(0, rounding)
+ if ans != self:
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ return ans
+
+ def to_integral_value(self, rounding=None, context=None):
+ """Rounds to the nearest integer, without raising inexact, rounded."""
+ if context is None:
+ context = getcontext()
+ if rounding is None:
+ rounding = context.rounding
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+ return Decimal(self)
+ if self._exp >= 0:
+ return Decimal(self)
+ else:
+ return self._rescale(0, rounding)
+
+ # the method name changed, but we provide also the old one, for compatibility
+ to_integral = to_integral_value
+
+ def sqrt(self, context=None):
+ """Return the square root of self."""
+ if context is None:
+ context = getcontext()
+
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._isinfinity() and self._sign == 0:
+ return Decimal(self)
+
+ if not self:
+ # exponent = self._exp // 2. sqrt(-0) = -0
+ ans = _dec_from_triple(self._sign, '0', self._exp // 2)
+ return ans._fix(context)
+
+ if self._sign == 1:
+ return context._raise_error(InvalidOperation, 'sqrt(-x), x > 0')
+
+ # At this point self represents a positive number. Let p be
+ # the desired precision and express self in the form c*100**e
+ # with c a positive real number and e an integer, c and e
+ # being chosen so that 100**(p-1) <= c < 100**p. Then the
+ # (exact) square root of self is sqrt(c)*10**e, and 10**(p-1)
+ # <= sqrt(c) < 10**p, so the closest representable Decimal at
+ # precision p is n*10**e where n = round_half_even(sqrt(c)),
+ # the closest integer to sqrt(c) with the even integer chosen
+ # in the case of a tie.
+ #
+ # To ensure correct rounding in all cases, we use the
+ # following trick: we compute the square root to an extra
+ # place (precision p+1 instead of precision p), rounding down.
+ # Then, if the result is inexact and its last digit is 0 or 5,
+ # we increase the last digit to 1 or 6 respectively; if it's
+ # exact we leave the last digit alone. Now the final round to
+ # p places (or fewer in the case of underflow) will round
+ # correctly and raise the appropriate flags.
+
+ # use an extra digit of precision
+ prec = context.prec+1
+
+ # write argument in the form c*100**e where e = self._exp//2
+ # is the 'ideal' exponent, to be used if the square root is
+ # exactly representable. l is the number of 'digits' of c in
+ # base 100, so that 100**(l-1) <= c < 100**l.
+ op = _WorkRep(self)
+ e = op.exp >> 1
+ if op.exp & 1:
+ c = op.int * 10
+ l = (len(self._int) >> 1) + 1
+ else:
+ c = op.int
+ l = len(self._int)+1 >> 1
+
+ # rescale so that c has exactly prec base 100 'digits'
+ shift = prec-l
+ if shift >= 0:
+ c *= 100**shift
+ exact = True
+ else:
+ c, remainder = divmod(c, 100**-shift)
+ exact = not remainder
+ e -= shift
+
+ # find n = floor(sqrt(c)) using Newton's method
+ n = 10**prec
+ while True:
+ q = c//n
+ if n <= q:
+ break
+ else:
+ n = n + q >> 1
+ exact = exact and n*n == c
+
+ if exact:
+ # result is exact; rescale to use ideal exponent e
+ if shift >= 0:
+ # assert n % 10**shift == 0
+ n //= 10**shift
+ else:
+ n *= 10**-shift
+ e += shift
+ else:
+ # result is not exact; fix last digit as described above
+ if n % 5 == 0:
+ n += 1
+
+ ans = _dec_from_triple(0, str(n), e)
+
+ # round, and fit to current context
+ context = context._shallow_copy()
+ rounding = context._set_rounding(ROUND_HALF_EVEN)
+ ans = ans._fix(context)
+ context.rounding = rounding
+
+ return ans
+
+ def max(self, other, context=None):
+ """Returns the larger value.
+
+ Like max(self, other) except if one is not a number, returns
+ NaN (and signals if one is sNaN). Also rounds.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ # If one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn == 0:
+ return self._fix(context)
+ if sn == 1 and on == 0:
+ return other._fix(context)
+ return self._check_nans(other, context)
+
+ c = self._cmp(other)
+ if c == 0:
+ # If both operands are finite and equal in numerical value
+ # then an ordering is applied:
+ #
+ # If the signs differ then max returns the operand with the
+ # positive sign and min returns the operand with the negative sign
+ #
+ # If the signs are the same then the exponent is used to select
+ # the result. This is exactly the ordering used in compare_total.
+ c = self.compare_total(other)
+
+ if c == -1:
+ ans = other
+ else:
+ ans = self
+
+ return ans._fix(context)
+
+ def min(self, other, context=None):
+ """Returns the smaller value.
+
+ Like min(self, other) except if one is not a number, returns
+ NaN (and signals if one is sNaN). Also rounds.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ # If one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn == 0:
+ return self._fix(context)
+ if sn == 1 and on == 0:
+ return other._fix(context)
+ return self._check_nans(other, context)
+
+ c = self._cmp(other)
+ if c == 0:
+ c = self.compare_total(other)
+
+ if c == -1:
+ ans = self
+ else:
+ ans = other
+
+ return ans._fix(context)
+
+ def _isinteger(self):
+ """Returns whether self is an integer"""
+ if self._is_special:
+ return False
+ if self._exp >= 0:
+ return True
+ rest = self._int[self._exp:]
+ return rest == '0'*len(rest)
+
+ def _iseven(self):
+ """Returns True if self is even. Assumes self is an integer."""
+ if not self or self._exp > 0:
+ return True
+ return self._int[-1+self._exp] in '02468'
+
+ def adjusted(self):
+ """Return the adjusted exponent of self"""
+ try:
+ return self._exp + len(self._int) - 1
+ # If NaN or Infinity, self._exp is string
+ except TypeError:
+ return 0
+
+ def canonical(self, context=None):
+ """Returns the same Decimal object.
+
+ As we do not have different encodings for the same number, the
+ received object already is in its canonical form.
+ """
+ return self
+
+ def compare_signal(self, other, context=None):
+ """Compares self to the other operand numerically.
+
+ It's pretty much like compare(), but all NaNs signal, with signaling
+ NaNs taking precedence over quiet NaNs.
+ """
+ other = _convert_other(other, raiseit = True)
+ ans = self._compare_check_nans(other, context)
+ if ans:
+ return ans
+ return self.compare(other, context=context)
+
+ def compare_total(self, other):
+ """Compares self to other using the abstract representations.
+
+ This is not like the standard compare, which use their numerical
+ value. Note that a total ordering is defined for all possible abstract
+ representations.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ # if one is negative and the other is positive, it's easy
+ if self._sign and not other._sign:
+ return _NegativeOne
+ if not self._sign and other._sign:
+ return _One
+ sign = self._sign
+
+ # let's handle both NaN types
+ self_nan = self._isnan()
+ other_nan = other._isnan()
+ if self_nan or other_nan:
+ if self_nan == other_nan:
+ # compare payloads as though they're integers
+ self_key = len(self._int), self._int
+ other_key = len(other._int), other._int
+ if self_key < other_key:
+ if sign:
+ return _One
+ else:
+ return _NegativeOne
+ if self_key > other_key:
+ if sign:
+ return _NegativeOne
+ else:
+ return _One
+ return _Zero
+
+ if sign:
+ if self_nan == 1:
+ return _NegativeOne
+ if other_nan == 1:
+ return _One
+ if self_nan == 2:
+ return _NegativeOne
+ if other_nan == 2:
+ return _One
+ else:
+ if self_nan == 1:
+ return _One
+ if other_nan == 1:
+ return _NegativeOne
+ if self_nan == 2:
+ return _One
+ if other_nan == 2:
+ return _NegativeOne
+
+ if self < other:
+ return _NegativeOne
+ if self > other:
+ return _One
+
+ if self._exp < other._exp:
+ if sign:
+ return _One
+ else:
+ return _NegativeOne
+ if self._exp > other._exp:
+ if sign:
+ return _NegativeOne
+ else:
+ return _One
+ return _Zero
+
+
+ def compare_total_mag(self, other):
+ """Compares self to other using abstract repr., ignoring sign.
+
+ Like compare_total, but with operand's sign ignored and assumed to be 0.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ s = self.copy_abs()
+ o = other.copy_abs()
+ return s.compare_total(o)
+
+ def copy_abs(self):
+ """Returns a copy with the sign set to 0. """
+ return _dec_from_triple(0, self._int, self._exp, self._is_special)
+
+ def copy_negate(self):
+ """Returns a copy with the sign inverted."""
+ if self._sign:
+ return _dec_from_triple(0, self._int, self._exp, self._is_special)
+ else:
+ return _dec_from_triple(1, self._int, self._exp, self._is_special)
+
+ def copy_sign(self, other):
+ """Returns self with the sign of other."""
+ other = _convert_other(other, raiseit=True)
+ return _dec_from_triple(other._sign, self._int,
+ self._exp, self._is_special)
+
+ def exp(self, context=None):
+ """Returns e ** self."""
+
+ if context is None:
+ context = getcontext()
+
+ # exp(NaN) = NaN
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ # exp(-Infinity) = 0
+ if self._isinfinity() == -1:
+ return _Zero
+
+ # exp(0) = 1
+ if not self:
+ return _One
+
+ # exp(Infinity) = Infinity
+ if self._isinfinity() == 1:
+ return Decimal(self)
+
+ # the result is now guaranteed to be inexact (the true
+ # mathematical result is transcendental). There's no need to
+ # raise Rounded and Inexact here---they'll always be raised as
+ # a result of the call to _fix.
+ p = context.prec
+ adj = self.adjusted()
+
+ # we only need to do any computation for quite a small range
+ # of adjusted exponents---for example, -29 <= adj <= 10 for
+ # the default context. For smaller exponent the result is
+ # indistinguishable from 1 at the given precision, while for
+ # larger exponent the result either overflows or underflows.
+ if self._sign == 0 and adj > len(str((context.Emax+1)*3)):
+ # overflow
+ ans = _dec_from_triple(0, '1', context.Emax+1)
+ elif self._sign == 1 and adj > len(str((-context.Etiny()+1)*3)):
+ # underflow to 0
+ ans = _dec_from_triple(0, '1', context.Etiny()-1)
+ elif self._sign == 0 and adj < -p:
+ # p+1 digits; final round will raise correct flags
+ ans = _dec_from_triple(0, '1' + '0'*(p-1) + '1', -p)
+ elif self._sign == 1 and adj < -p-1:
+ # p+1 digits; final round will raise correct flags
+ ans = _dec_from_triple(0, '9'*(p+1), -p-1)
+ # general case
+ else:
+ op = _WorkRep(self)
+ c, e = op.int, op.exp
+ if op.sign == 1:
+ c = -c
+
+ # compute correctly rounded result: increase precision by
+ # 3 digits at a time until we get an unambiguously
+ # roundable result
+ extra = 3
+ while True:
+ coeff, exp = _dexp(c, e, p+extra)
+ if coeff % (5*10**(len(str(coeff))-p-1)):
+ break
+ extra += 3
+
+ ans = _dec_from_triple(0, str(coeff), exp)
+
+ # at this stage, ans should round correctly with *any*
+ # rounding mode, not just with ROUND_HALF_EVEN
+ context = context._shallow_copy()
+ rounding = context._set_rounding(ROUND_HALF_EVEN)
+ ans = ans._fix(context)
+ context.rounding = rounding
+
+ return ans
+
+ def is_canonical(self):
+ """Return True if self is canonical; otherwise return False.
+
+ Currently, the encoding of a Decimal instance is always
+ canonical, so this method returns True for any Decimal.
+ """
+ return True
+
+ def is_finite(self):
+ """Return True if self is finite; otherwise return False.
+
+ A Decimal instance is considered finite if it is neither
+ infinite nor a NaN.
+ """
+ return not self._is_special
+
+ def is_infinite(self):
+ """Return True if self is infinite; otherwise return False."""
+ return self._exp == 'F'
+
+ def is_nan(self):
+ """Return True if self is a qNaN or sNaN; otherwise return False."""
+ return self._exp in ('n', 'N')
+
+ def is_normal(self, context=None):
+ """Return True if self is a normal number; otherwise return False."""
+ if self._is_special or not self:
+ return False
+ if context is None:
+ context = getcontext()
+ return context.Emin <= self.adjusted()
+
+ def is_qnan(self):
+ """Return True if self is a quiet NaN; otherwise return False."""
+ return self._exp == 'n'
+
+ def is_signed(self):
+ """Return True if self is negative; otherwise return False."""
+ return self._sign == 1
+
+ def is_snan(self):
+ """Return True if self is a signaling NaN; otherwise return False."""
+ return self._exp == 'N'
+
+ def is_subnormal(self, context=None):
+ """Return True if self is subnormal; otherwise return False."""
+ if self._is_special or not self:
+ return False
+ if context is None:
+ context = getcontext()
+ return self.adjusted() < context.Emin
+
+ def is_zero(self):
+ """Return True if self is a zero; otherwise return False."""
+ return not self._is_special and self._int == '0'
+
+ def _ln_exp_bound(self):
+ """Compute a lower bound for the adjusted exponent of self.ln().
+ In other words, compute r such that self.ln() >= 10**r. Assumes
+ that self is finite and positive and that self != 1.
+ """
+
+ # for 0.1 <= x <= 10 we use the inequalities 1-1/x <= ln(x) <= x-1
+ adj = self._exp + len(self._int) - 1
+ if adj >= 1:
+ # argument >= 10; we use 23/10 = 2.3 as a lower bound for ln(10)
+ return len(str(adj*23//10)) - 1
+ if adj <= -2:
+ # argument <= 0.1
+ return len(str((-1-adj)*23//10)) - 1
+ op = _WorkRep(self)
+ c, e = op.int, op.exp
+ if adj == 0:
+ # 1 < self < 10
+ num = str(c-10**-e)
+ den = str(c)
+ return len(num) - len(den) - (num < den)
+ # adj == -1, 0.1 <= self < 1
+ return e + len(str(10**-e - c)) - 1
+
+
+ def ln(self, context=None):
+ """Returns the natural (base e) logarithm of self."""
+
+ if context is None:
+ context = getcontext()
+
+ # ln(NaN) = NaN
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ # ln(0.0) == -Infinity
+ if not self:
+ return _NegativeInfinity
+
+ # ln(Infinity) = Infinity
+ if self._isinfinity() == 1:
+ return _Infinity
+
+ # ln(1.0) == 0.0
+ if self == _One:
+ return _Zero
+
+ # ln(negative) raises InvalidOperation
+ if self._sign == 1:
+ return context._raise_error(InvalidOperation,
+ 'ln of a negative value')
+
+ # result is irrational, so necessarily inexact
+ op = _WorkRep(self)
+ c, e = op.int, op.exp
+ p = context.prec
+
+ # correctly rounded result: repeatedly increase precision by 3
+ # until we get an unambiguously roundable result
+ places = p - self._ln_exp_bound() + 2 # at least p+3 places
+ while True:
+ coeff = _dlog(c, e, places)
+ # assert len(str(abs(coeff)))-p >= 1
+ if coeff % (5*10**(len(str(abs(coeff)))-p-1)):
+ break
+ places += 3
+ ans = _dec_from_triple(int(coeff<0), str(abs(coeff)), -places)
+
+ context = context._shallow_copy()
+ rounding = context._set_rounding(ROUND_HALF_EVEN)
+ ans = ans._fix(context)
+ context.rounding = rounding
+ return ans
+
+ def _log10_exp_bound(self):
+ """Compute a lower bound for the adjusted exponent of self.log10().
+ In other words, find r such that self.log10() >= 10**r.
+ Assumes that self is finite and positive and that self != 1.
+ """
+
+ # For x >= 10 or x < 0.1 we only need a bound on the integer
+ # part of log10(self), and this comes directly from the
+ # exponent of x. For 0.1 <= x <= 10 we use the inequalities
+ # 1-1/x <= log(x) <= x-1. If x > 1 we have |log10(x)| >
+ # (1-1/x)/2.31 > 0. If x < 1 then |log10(x)| > (1-x)/2.31 > 0
+
+ adj = self._exp + len(self._int) - 1
+ if adj >= 1:
+ # self >= 10
+ return len(str(adj))-1
+ if adj <= -2:
+ # self < 0.1
+ return len(str(-1-adj))-1
+ op = _WorkRep(self)
+ c, e = op.int, op.exp
+ if adj == 0:
+ # 1 < self < 10
+ num = str(c-10**-e)
+ den = str(231*c)
+ return len(num) - len(den) - (num < den) + 2
+ # adj == -1, 0.1 <= self < 1
+ num = str(10**-e-c)
+ return len(num) + e - (num < "231") - 1
+
+ def log10(self, context=None):
+ """Returns the base 10 logarithm of self."""
+
+ if context is None:
+ context = getcontext()
+
+ # log10(NaN) = NaN
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ # log10(0.0) == -Infinity
+ if not self:
+ return _NegativeInfinity
+
+ # log10(Infinity) = Infinity
+ if self._isinfinity() == 1:
+ return _Infinity
+
+ # log10(negative or -Infinity) raises InvalidOperation
+ if self._sign == 1:
+ return context._raise_error(InvalidOperation,
+ 'log10 of a negative value')
+
+ # log10(10**n) = n
+ if self._int[0] == '1' and self._int[1:] == '0'*(len(self._int) - 1):
+ # answer may need rounding
+ ans = Decimal(self._exp + len(self._int) - 1)
+ else:
+ # result is irrational, so necessarily inexact
+ op = _WorkRep(self)
+ c, e = op.int, op.exp
+ p = context.prec
+
+ # correctly rounded result: repeatedly increase precision
+ # until result is unambiguously roundable
+ places = p-self._log10_exp_bound()+2
+ while True:
+ coeff = _dlog10(c, e, places)
+ # assert len(str(abs(coeff)))-p >= 1
+ if coeff % (5*10**(len(str(abs(coeff)))-p-1)):
+ break
+ places += 3
+ ans = _dec_from_triple(int(coeff<0), str(abs(coeff)), -places)
+
+ context = context._shallow_copy()
+ rounding = context._set_rounding(ROUND_HALF_EVEN)
+ ans = ans._fix(context)
+ context.rounding = rounding
+ return ans
+
+ def logb(self, context=None):
+ """ Returns the exponent of the magnitude of self's MSD.
+
+ The result is the integer which is the exponent of the magnitude
+ of the most significant digit of self (as though it were truncated
+ to a single digit while maintaining the value of that digit and
+ without limiting the resulting exponent).
+ """
+ # logb(NaN) = NaN
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if context is None:
+ context = getcontext()
+
+ # logb(+/-Inf) = +Inf
+ if self._isinfinity():
+ return _Infinity
+
+ # logb(0) = -Inf, DivisionByZero
+ if not self:
+ return context._raise_error(DivisionByZero, 'logb(0)', 1)
+
+ # otherwise, simply return the adjusted exponent of self, as a
+ # Decimal. Note that no attempt is made to fit the result
+ # into the current context.
+ ans = Decimal(self.adjusted())
+ return ans._fix(context)
+
+ def _islogical(self):
+ """Return True if self is a logical operand.
+
+ For being logical, it must be a finite number with a sign of 0,
+ an exponent of 0, and a coefficient whose digits must all be
+ either 0 or 1.
+ """
+ if self._sign != 0 or self._exp != 0:
+ return False
+ for dig in self._int:
+ if dig not in '01':
+ return False
+ return True
+
+ def _fill_logical(self, context, opa, opb):
+ dif = context.prec - len(opa)
+ if dif > 0:
+ opa = '0'*dif + opa
+ elif dif < 0:
+ opa = opa[-context.prec:]
+ dif = context.prec - len(opb)
+ if dif > 0:
+ opb = '0'*dif + opb
+ elif dif < 0:
+ opb = opb[-context.prec:]
+ return opa, opb
+
+ def logical_and(self, other, context=None):
+ """Applies an 'and' operation between self and other's digits."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ if not self._islogical() or not other._islogical():
+ return context._raise_error(InvalidOperation)
+
+ # fill to context.prec
+ (opa, opb) = self._fill_logical(context, self._int, other._int)
+
+ # make the operation, and clean starting zeroes
+ result = "".join([str(int(a)&int(b)) for a,b in zip(opa,opb)])
+ return _dec_from_triple(0, result.lstrip('0') or '0', 0)
+
+ def logical_invert(self, context=None):
+ """Invert all its digits."""
+ if context is None:
+ context = getcontext()
+ return self.logical_xor(_dec_from_triple(0,'1'*context.prec,0),
+ context)
+
+ def logical_or(self, other, context=None):
+ """Applies an 'or' operation between self and other's digits."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ if not self._islogical() or not other._islogical():
+ return context._raise_error(InvalidOperation)
+
+ # fill to context.prec
+ (opa, opb) = self._fill_logical(context, self._int, other._int)
+
+ # make the operation, and clean starting zeroes
+ result = "".join([str(int(a)|int(b)) for a,b in zip(opa,opb)])
+ return _dec_from_triple(0, result.lstrip('0') or '0', 0)
+
+ def logical_xor(self, other, context=None):
+ """Applies an 'xor' operation between self and other's digits."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ if not self._islogical() or not other._islogical():
+ return context._raise_error(InvalidOperation)
+
+ # fill to context.prec
+ (opa, opb) = self._fill_logical(context, self._int, other._int)
+
+ # make the operation, and clean starting zeroes
+ result = "".join([str(int(a)^int(b)) for a,b in zip(opa,opb)])
+ return _dec_from_triple(0, result.lstrip('0') or '0', 0)
+
+ def max_mag(self, other, context=None):
+ """Compares the values numerically with their sign ignored."""
+ other = _convert_other(other, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ # If one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn == 0:
+ return self._fix(context)
+ if sn == 1 and on == 0:
+ return other._fix(context)
+ return self._check_nans(other, context)
+
+ c = self.copy_abs()._cmp(other.copy_abs())
+ if c == 0:
+ c = self.compare_total(other)
+
+ if c == -1:
+ ans = other
+ else:
+ ans = self
+
+ return ans._fix(context)
+
+ def min_mag(self, other, context=None):
+ """Compares the values numerically with their sign ignored."""
+ other = _convert_other(other, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ # If one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn == 0:
+ return self._fix(context)
+ if sn == 1 and on == 0:
+ return other._fix(context)
+ return self._check_nans(other, context)
+
+ c = self.copy_abs()._cmp(other.copy_abs())
+ if c == 0:
+ c = self.compare_total(other)
+
+ if c == -1:
+ ans = self
+ else:
+ ans = other
+
+ return ans._fix(context)
+
+ def next_minus(self, context=None):
+ """Returns the largest representable number smaller than itself."""
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._isinfinity() == -1:
+ return _NegativeInfinity
+ if self._isinfinity() == 1:
+ return _dec_from_triple(0, '9'*context.prec, context.Etop())
+
+ context = context.copy()
+ context._set_rounding(ROUND_FLOOR)
+ context._ignore_all_flags()
+ new_self = self._fix(context)
+ if new_self != self:
+ return new_self
+ return self.__sub__(_dec_from_triple(0, '1', context.Etiny()-1),
+ context)
+
+ def next_plus(self, context=None):
+ """Returns the smallest representable number larger than itself."""
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._isinfinity() == 1:
+ return _Infinity
+ if self._isinfinity() == -1:
+ return _dec_from_triple(1, '9'*context.prec, context.Etop())
+
+ context = context.copy()
+ context._set_rounding(ROUND_CEILING)
+ context._ignore_all_flags()
+ new_self = self._fix(context)
+ if new_self != self:
+ return new_self
+ return self.__add__(_dec_from_triple(0, '1', context.Etiny()-1),
+ context)
+
+ def next_toward(self, other, context=None):
+ """Returns the number closest to self, in the direction towards other.
+
+ The result is the closest representable number to self
+ (excluding self) that is in the direction towards other,
+ unless both have the same value. If the two operands are
+ numerically equal, then the result is a copy of self with the
+ sign set to be the same as the sign of other.
+ """
+ other = _convert_other(other, raiseit=True)
+
+ if context is None:
+ context = getcontext()
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ comparison = self._cmp(other)
+ if comparison == 0:
+ return self.copy_sign(other)
+
+ if comparison == -1:
+ ans = self.next_plus(context)
+ else: # comparison == 1
+ ans = self.next_minus(context)
+
+ # decide which flags to raise using value of ans
+ if ans._isinfinity():
+ context._raise_error(Overflow,
+ 'Infinite result from next_toward',
+ ans._sign)
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ elif ans.adjusted() < context.Emin:
+ context._raise_error(Underflow)
+ context._raise_error(Subnormal)
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ # if precision == 1 then we don't raise Clamped for a
+ # result 0E-Etiny.
+ if not ans:
+ context._raise_error(Clamped)
+
+ return ans
+
+ def number_class(self, context=None):
+ """Returns an indication of the class of self.
+
+ The class is one of the following strings:
+ sNaN
+ NaN
+ -Infinity
+ -Normal
+ -Subnormal
+ -Zero
+ +Zero
+ +Subnormal
+ +Normal
+ +Infinity
+ """
+ if self.is_snan():
+ return "sNaN"
+ if self.is_qnan():
+ return "NaN"
+ inf = self._isinfinity()
+ if inf == 1:
+ return "+Infinity"
+ if inf == -1:
+ return "-Infinity"
+ if self.is_zero():
+ if self._sign:
+ return "-Zero"
+ else:
+ return "+Zero"
+ if context is None:
+ context = getcontext()
+ if self.is_subnormal(context=context):
+ if self._sign:
+ return "-Subnormal"
+ else:
+ return "+Subnormal"
+ # just a normal, regular, boring number, :)
+ if self._sign:
+ return "-Normal"
+ else:
+ return "+Normal"
+
+ def radix(self):
+ """Just returns 10, as this is Decimal, :)"""
+ return Decimal(10)
+
+ def rotate(self, other, context=None):
+ """Returns a rotated copy of self, value-of-other times."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if other._exp != 0:
+ return context._raise_error(InvalidOperation)
+ if not (-context.prec <= int(other) <= context.prec):
+ return context._raise_error(InvalidOperation)
+
+ if self._isinfinity():
+ return Decimal(self)
+
+ # get values, pad if necessary
+ torot = int(other)
+ rotdig = self._int
+ topad = context.prec - len(rotdig)
+ if topad > 0:
+ rotdig = '0'*topad + rotdig
+ elif topad < 0:
+ rotdig = rotdig[-topad:]
+
+ # let's rotate!
+ rotated = rotdig[torot:] + rotdig[:torot]
+ return _dec_from_triple(self._sign,
+ rotated.lstrip('0') or '0', self._exp)
+
+ def scaleb(self, other, context=None):
+ """Returns self operand after adding the second value to its exp."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if other._exp != 0:
+ return context._raise_error(InvalidOperation)
+ liminf = -2 * (context.Emax + context.prec)
+ limsup = 2 * (context.Emax + context.prec)
+ if not (liminf <= int(other) <= limsup):
+ return context._raise_error(InvalidOperation)
+
+ if self._isinfinity():
+ return Decimal(self)
+
+ d = _dec_from_triple(self._sign, self._int, self._exp + int(other))
+ d = d._fix(context)
+ return d
+
+ def shift(self, other, context=None):
+ """Returns a shifted copy of self, value-of-other times."""
+ if context is None:
+ context = getcontext()
+
+ other = _convert_other(other, raiseit=True)
+
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if other._exp != 0:
+ return context._raise_error(InvalidOperation)
+ if not (-context.prec <= int(other) <= context.prec):
+ return context._raise_error(InvalidOperation)
+
+ if self._isinfinity():
+ return Decimal(self)
+
+ # get values, pad if necessary
+ torot = int(other)
+ rotdig = self._int
+ topad = context.prec - len(rotdig)
+ if topad > 0:
+ rotdig = '0'*topad + rotdig
+ elif topad < 0:
+ rotdig = rotdig[-topad:]
+
+ # let's shift!
+ if torot < 0:
+ shifted = rotdig[:torot]
+ else:
+ shifted = rotdig + '0'*torot
+ shifted = shifted[-context.prec:]
+
+ return _dec_from_triple(self._sign,
+ shifted.lstrip('0') or '0', self._exp)
+
+ # Support for pickling, copy, and deepcopy
+ def __reduce__(self):
+ return (self.__class__, (str(self),))
+
+ def __copy__(self):
+ if type(self) is Decimal:
+ return self # I'm immutable; therefore I am my own clone
+ return self.__class__(str(self))
+
+ def __deepcopy__(self, memo):
+ if type(self) is Decimal:
+ return self # My components are also immutable
+ return self.__class__(str(self))
+
+ # PEP 3101 support. the _localeconv keyword argument should be
+ # considered private: it's provided for ease of testing only.
+ def __format__(self, specifier, context=None, _localeconv=None):
+ """Format a Decimal instance according to the given specifier.
+
+ The specifier should be a standard format specifier, with the
+ form described in PEP 3101. Formatting types 'e', 'E', 'f',
+ 'F', 'g', 'G', 'n' and '%' are supported. If the formatting
+ type is omitted it defaults to 'g' or 'G', depending on the
+ value of context.capitals.
+ """
+
+ # Note: PEP 3101 says that if the type is not present then
+ # there should be at least one digit after the decimal point.
+ # We take the liberty of ignoring this requirement for
+ # Decimal---it's presumably there to make sure that
+ # format(float, '') behaves similarly to str(float).
+ if context is None:
+ context = getcontext()
+
+ spec = _parse_format_specifier(specifier, _localeconv=_localeconv)
+
+ # special values don't care about the type or precision
+ if self._is_special:
+ sign = _format_sign(self._sign, spec)
+ body = str(self.copy_abs())
+ if spec['type'] == '%':
+ body += '%'
+ return _format_align(sign, body, spec)
+
+ # a type of None defaults to 'g' or 'G', depending on context
+ if spec['type'] is None:
+ spec['type'] = ['g', 'G'][context.capitals]
+
+ # if type is '%', adjust exponent of self accordingly
+ if spec['type'] == '%':
+ self = _dec_from_triple(self._sign, self._int, self._exp+2)
+
+ # round if necessary, taking rounding mode from the context
+ rounding = context.rounding
+ precision = spec['precision']
+ if precision is not None:
+ if spec['type'] in 'eE':
+ self = self._round(precision+1, rounding)
+ elif spec['type'] in 'fF%':
+ self = self._rescale(-precision, rounding)
+ elif spec['type'] in 'gG' and len(self._int) > precision:
+ self = self._round(precision, rounding)
+ # special case: zeros with a positive exponent can't be
+ # represented in fixed point; rescale them to 0e0.
+ if not self and self._exp > 0 and spec['type'] in 'fF%':
+ self = self._rescale(0, rounding)
+
+ # figure out placement of the decimal point
+ leftdigits = self._exp + len(self._int)
+ if spec['type'] in 'eE':
+ if not self and precision is not None:
+ dotplace = 1 - precision
+ else:
+ dotplace = 1
+ elif spec['type'] in 'fF%':
+ dotplace = leftdigits
+ elif spec['type'] in 'gG':
+ if self._exp <= 0 and leftdigits > -6:
+ dotplace = leftdigits
+ else:
+ dotplace = 1
+
+ # find digits before and after decimal point, and get exponent
+ if dotplace < 0:
+ intpart = '0'
+ fracpart = '0'*(-dotplace) + self._int
+ elif dotplace > len(self._int):
+ intpart = self._int + '0'*(dotplace-len(self._int))
+ fracpart = ''
+ else:
+ intpart = self._int[:dotplace] or '0'
+ fracpart = self._int[dotplace:]
+ exp = leftdigits-dotplace
+
+ # done with the decimal-specific stuff; hand over the rest
+ # of the formatting to the _format_number function
+ return _format_number(self._sign, intpart, fracpart, exp, spec)
+
+def _dec_from_triple(sign, coefficient, exponent, special=False):
+ """Create a decimal instance directly, without any validation,
+ normalization (e.g. removal of leading zeros) or argument
+ conversion.
+
+ This function is for *internal use only*.
+ """
+
+ self = object.__new__(Decimal)
+ self._sign = sign
+ self._int = coefficient
+ self._exp = exponent
+ self._is_special = special
+
+ return self
+
+# Register Decimal as a kind of Number (an abstract base class).
+# However, do not register it as Real (because Decimals are not
+# interoperable with floats).
+_numbers.Number.register(Decimal)
+
+
+##### Context class #######################################################
+
+class _ContextManager(object):
+ """Context manager class to support localcontext().
+
+ Sets a copy of the supplied context in __enter__() and restores
+ the previous decimal context in __exit__()
+ """
+ def __init__(self, new_context):
+ self.new_context = new_context.copy()
+ def __enter__(self):
+ self.saved_context = getcontext()
+ setcontext(self.new_context)
+ return self.new_context
+ def __exit__(self, t, v, tb):
+ setcontext(self.saved_context)
+
+class Context(object):
+ """Contains the context for a Decimal instance.
+
+ Contains:
+ prec - precision (for use in rounding, division, square roots..)
+ rounding - rounding type (how you round)
+ traps - If traps[exception] = 1, then the exception is
+ raised when it is caused. Otherwise, a value is
+ substituted in.
+ flags - When an exception is caused, flags[exception] is set.
+ (Whether or not the trap_enabler is set)
+ Should be reset by user of Decimal instance.
+ Emin - Minimum exponent
+ Emax - Maximum exponent
+ capitals - If 1, 1*10^1 is printed as 1E+1.
+ If 0, printed as 1e1
+ _clamp - If 1, change exponents if too high (Default 0)
+ """
+
+ def __init__(self, prec=None, rounding=None,
+ traps=None, flags=None,
+ Emin=None, Emax=None,
+ capitals=None, _clamp=0,
+ _ignored_flags=None):
+ # Set defaults; for everything except flags and _ignored_flags,
+ # inherit from DefaultContext.
+ try:
+ dc = DefaultContext
+ except NameError:
+ pass
+
+ self.prec = prec if prec is not None else dc.prec
+ self.rounding = rounding if rounding is not None else dc.rounding
+ self.Emin = Emin if Emin is not None else dc.Emin
+ self.Emax = Emax if Emax is not None else dc.Emax
+ self.capitals = capitals if capitals is not None else dc.capitals
+ self._clamp = _clamp if _clamp is not None else dc._clamp
+
+ if _ignored_flags is None:
+ self._ignored_flags = []
+ else:
+ self._ignored_flags = _ignored_flags
+
+ if traps is None:
+ self.traps = dc.traps.copy()
+ elif not isinstance(traps, dict):
+ self.traps = dict((s, int(s in traps)) for s in _signals)
+ else:
+ self.traps = traps
+
+ if flags is None:
+ self.flags = dict.fromkeys(_signals, 0)
+ elif not isinstance(flags, dict):
+ self.flags = dict((s, int(s in flags)) for s in _signals)
+ else:
+ self.flags = flags
+
+ def __repr__(self):
+ """Show the current context."""
+ s = []
+ s.append('Context(prec=%(prec)d, rounding=%(rounding)s, '
+ 'Emin=%(Emin)d, Emax=%(Emax)d, capitals=%(capitals)d'
+ % vars(self))
+ names = [f.__name__ for f, v in self.flags.items() if v]
+ s.append('flags=[' + ', '.join(names) + ']')
+ names = [t.__name__ for t, v in self.traps.items() if v]
+ s.append('traps=[' + ', '.join(names) + ']')
+ return ', '.join(s) + ')'
+
+ def clear_flags(self):
+ """Reset all flags to zero"""
+ for flag in self.flags:
+ self.flags[flag] = 0
+
+ def _shallow_copy(self):
+ """Returns a shallow copy from self."""
+ nc = Context(self.prec, self.rounding, self.traps,
+ self.flags, self.Emin, self.Emax,
+ self.capitals, self._clamp, self._ignored_flags)
+ return nc
+
+ def copy(self):
+ """Returns a deep copy from self."""
+ nc = Context(self.prec, self.rounding, self.traps.copy(),
+ self.flags.copy(), self.Emin, self.Emax,
+ self.capitals, self._clamp, self._ignored_flags)
+ return nc
+ __copy__ = copy
+
+ def _raise_error(self, condition, explanation = None, *args):
+ """Handles an error
+
+ If the flag is in _ignored_flags, returns the default response.
+ Otherwise, it sets the flag, then, if the corresponding
+ trap_enabler is set, it reraises the exception. Otherwise, it returns
+ the default value after setting the flag.
+ """
+ error = _condition_map.get(condition, condition)
+ if error in self._ignored_flags:
+ # Don't touch the flag
+ return error().handle(self, *args)
+
+ self.flags[error] = 1
+ if not self.traps[error]:
+ # The errors define how to handle themselves.
+ return condition().handle(self, *args)
+
+ # Errors should only be risked on copies of the context
+ # self._ignored_flags = []
+ raise error(explanation)
+
+ def _ignore_all_flags(self):
+ """Ignore all flags, if they are raised"""
+ return self._ignore_flags(*_signals)
+
+ def _ignore_flags(self, *flags):
+ """Ignore the flags, if they are raised"""
+ # Do not mutate-- This way, copies of a context leave the original
+ # alone.
+ self._ignored_flags = (self._ignored_flags + list(flags))
+ return list(flags)
+
+ def _regard_flags(self, *flags):
+ """Stop ignoring the flags, if they are raised"""
+ if flags and isinstance(flags[0], (tuple,list)):
+ flags = flags[0]
+ for flag in flags:
+ self._ignored_flags.remove(flag)
+
+ # We inherit object.__hash__, so we must deny this explicitly
+ __hash__ = None
+
+ def Etiny(self):
+ """Returns Etiny (= Emin - prec + 1)"""
+ return int(self.Emin - self.prec + 1)
+
+ def Etop(self):
+ """Returns maximum exponent (= Emax - prec + 1)"""
+ return int(self.Emax - self.prec + 1)
+
+ def _set_rounding(self, type):
+ """Sets the rounding type.
+
+ Sets the rounding type, and returns the current (previous)
+ rounding type. Often used like:
+
+ context = context.copy()
+ # so you don't change the calling context
+ # if an error occurs in the middle.
+ rounding = context._set_rounding(ROUND_UP)
+ val = self.__sub__(other, context=context)
+ context._set_rounding(rounding)
+
+ This will make it round up for that operation.
+ """
+ rounding = self.rounding
+ self.rounding= type
+ return rounding
+
+ def create_decimal(self, num='0'):
+ """Creates a new Decimal instance but using self as context.
+
+ This method implements the to-number operation of the
+ IBM Decimal specification."""
+
+ if isinstance(num, basestring) and num != num.strip():
+ return self._raise_error(ConversionSyntax,
+ "no trailing or leading whitespace is "
+ "permitted.")
+
+ d = Decimal(num, context=self)
+ if d._isnan() and len(d._int) > self.prec - self._clamp:
+ return self._raise_error(ConversionSyntax,
+ "diagnostic info too long in NaN")
+ return d._fix(self)
+
+ def create_decimal_from_float(self, f):
+ """Creates a new Decimal instance from a float but rounding using self
+ as the context.
+
+ >>> context = Context(prec=5, rounding=ROUND_DOWN)
+ >>> context.create_decimal_from_float(3.1415926535897932)
+ Decimal('3.1415')
+ >>> context = Context(prec=5, traps=[Inexact])
+ >>> context.create_decimal_from_float(3.1415926535897932)
+ Traceback (most recent call last):
+ ...
+ Inexact: None
+
+ """
+ d = Decimal.from_float(f) # An exact conversion
+ return d._fix(self) # Apply the context rounding
+
+ # Methods
+ def abs(self, a):
+ """Returns the absolute value of the operand.
+
+ If the operand is negative, the result is the same as using the minus
+ operation on the operand. Otherwise, the result is the same as using
+ the plus operation on the operand.
+
+ >>> ExtendedContext.abs(Decimal('2.1'))
+ Decimal('2.1')
+ >>> ExtendedContext.abs(Decimal('-100'))
+ Decimal('100')
+ >>> ExtendedContext.abs(Decimal('101.5'))
+ Decimal('101.5')
+ >>> ExtendedContext.abs(Decimal('-101.5'))
+ Decimal('101.5')
+ >>> ExtendedContext.abs(-1)
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.__abs__(context=self)
+
+ def add(self, a, b):
+ """Return the sum of the two operands.
+
+ >>> ExtendedContext.add(Decimal('12'), Decimal('7.00'))
+ Decimal('19.00')
+ >>> ExtendedContext.add(Decimal('1E+2'), Decimal('1.01E+4'))
+ Decimal('1.02E+4')
+ >>> ExtendedContext.add(1, Decimal(2))
+ Decimal('3')
+ >>> ExtendedContext.add(Decimal(8), 5)
+ Decimal('13')
+ >>> ExtendedContext.add(5, 5)
+ Decimal('10')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__add__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def _apply(self, a):
+ return str(a._fix(self))
+
+ def canonical(self, a):
+ """Returns the same Decimal object.
+
+ As we do not have different encodings for the same number, the
+ received object already is in its canonical form.
+
+ >>> ExtendedContext.canonical(Decimal('2.50'))
+ Decimal('2.50')
+ """
+ return a.canonical(context=self)
+
+ def compare(self, a, b):
+ """Compares values numerically.
+
+ If the signs of the operands differ, a value representing each operand
+ ('-1' if the operand is less than zero, '0' if the operand is zero or
+ negative zero, or '1' if the operand is greater than zero) is used in
+ place of that operand for the comparison instead of the actual
+ operand.
+
+ The comparison is then effected by subtracting the second operand from
+ the first and then returning a value according to the result of the
+ subtraction: '-1' if the result is less than zero, '0' if the result is
+ zero or negative zero, or '1' if the result is greater than zero.
+
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('3'))
+ Decimal('-1')
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.1'))
+ Decimal('0')
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.10'))
+ Decimal('0')
+ >>> ExtendedContext.compare(Decimal('3'), Decimal('2.1'))
+ Decimal('1')
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('-3'))
+ Decimal('1')
+ >>> ExtendedContext.compare(Decimal('-3'), Decimal('2.1'))
+ Decimal('-1')
+ >>> ExtendedContext.compare(1, 2)
+ Decimal('-1')
+ >>> ExtendedContext.compare(Decimal(1), 2)
+ Decimal('-1')
+ >>> ExtendedContext.compare(1, Decimal(2))
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.compare(b, context=self)
+
+ def compare_signal(self, a, b):
+ """Compares the values of the two operands numerically.
+
+ It's pretty much like compare(), but all NaNs signal, with signaling
+ NaNs taking precedence over quiet NaNs.
+
+ >>> c = ExtendedContext
+ >>> c.compare_signal(Decimal('2.1'), Decimal('3'))
+ Decimal('-1')
+ >>> c.compare_signal(Decimal('2.1'), Decimal('2.1'))
+ Decimal('0')
+ >>> c.flags[InvalidOperation] = 0
+ >>> print c.flags[InvalidOperation]
+ 0
+ >>> c.compare_signal(Decimal('NaN'), Decimal('2.1'))
+ Decimal('NaN')
+ >>> print c.flags[InvalidOperation]
+ 1
+ >>> c.flags[InvalidOperation] = 0
+ >>> print c.flags[InvalidOperation]
+ 0
+ >>> c.compare_signal(Decimal('sNaN'), Decimal('2.1'))
+ Decimal('NaN')
+ >>> print c.flags[InvalidOperation]
+ 1
+ >>> c.compare_signal(-1, 2)
+ Decimal('-1')
+ >>> c.compare_signal(Decimal(-1), 2)
+ Decimal('-1')
+ >>> c.compare_signal(-1, Decimal(2))
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.compare_signal(b, context=self)
+
+ def compare_total(self, a, b):
+ """Compares two operands using their abstract representation.
+
+ This is not like the standard compare, which use their numerical
+ value. Note that a total ordering is defined for all possible abstract
+ representations.
+
+ >>> ExtendedContext.compare_total(Decimal('12.73'), Decimal('127.9'))
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(Decimal('-127'), Decimal('12'))
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(Decimal('12.30'), Decimal('12.3'))
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(Decimal('12.30'), Decimal('12.30'))
+ Decimal('0')
+ >>> ExtendedContext.compare_total(Decimal('12.3'), Decimal('12.300'))
+ Decimal('1')
+ >>> ExtendedContext.compare_total(Decimal('12.3'), Decimal('NaN'))
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(1, 2)
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(Decimal(1), 2)
+ Decimal('-1')
+ >>> ExtendedContext.compare_total(1, Decimal(2))
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.compare_total(b)
+
+ def compare_total_mag(self, a, b):
+ """Compares two operands using their abstract representation ignoring sign.
+
+ Like compare_total, but with operand's sign ignored and assumed to be 0.
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.compare_total_mag(b)
+
+ def copy_abs(self, a):
+ """Returns a copy of the operand with the sign set to 0.
+
+ >>> ExtendedContext.copy_abs(Decimal('2.1'))
+ Decimal('2.1')
+ >>> ExtendedContext.copy_abs(Decimal('-100'))
+ Decimal('100')
+ >>> ExtendedContext.copy_abs(-1)
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.copy_abs()
+
+ def copy_decimal(self, a):
+ """Returns a copy of the decimal object.
+
+ >>> ExtendedContext.copy_decimal(Decimal('2.1'))
+ Decimal('2.1')
+ >>> ExtendedContext.copy_decimal(Decimal('-1.00'))
+ Decimal('-1.00')
+ >>> ExtendedContext.copy_decimal(1)
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return Decimal(a)
+
+ def copy_negate(self, a):
+ """Returns a copy of the operand with the sign inverted.
+
+ >>> ExtendedContext.copy_negate(Decimal('101.5'))
+ Decimal('-101.5')
+ >>> ExtendedContext.copy_negate(Decimal('-101.5'))
+ Decimal('101.5')
+ >>> ExtendedContext.copy_negate(1)
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.copy_negate()
+
+ def copy_sign(self, a, b):
+ """Copies the second operand's sign to the first one.
+
+ In detail, it returns a copy of the first operand with the sign
+ equal to the sign of the second operand.
+
+ >>> ExtendedContext.copy_sign(Decimal( '1.50'), Decimal('7.33'))
+ Decimal('1.50')
+ >>> ExtendedContext.copy_sign(Decimal('-1.50'), Decimal('7.33'))
+ Decimal('1.50')
+ >>> ExtendedContext.copy_sign(Decimal( '1.50'), Decimal('-7.33'))
+ Decimal('-1.50')
+ >>> ExtendedContext.copy_sign(Decimal('-1.50'), Decimal('-7.33'))
+ Decimal('-1.50')
+ >>> ExtendedContext.copy_sign(1, -2)
+ Decimal('-1')
+ >>> ExtendedContext.copy_sign(Decimal(1), -2)
+ Decimal('-1')
+ >>> ExtendedContext.copy_sign(1, Decimal(-2))
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.copy_sign(b)
+
+ def divide(self, a, b):
+ """Decimal division in a specified context.
+
+ >>> ExtendedContext.divide(Decimal('1'), Decimal('3'))
+ Decimal('0.333333333')
+ >>> ExtendedContext.divide(Decimal('2'), Decimal('3'))
+ Decimal('0.666666667')
+ >>> ExtendedContext.divide(Decimal('5'), Decimal('2'))
+ Decimal('2.5')
+ >>> ExtendedContext.divide(Decimal('1'), Decimal('10'))
+ Decimal('0.1')
+ >>> ExtendedContext.divide(Decimal('12'), Decimal('12'))
+ Decimal('1')
+ >>> ExtendedContext.divide(Decimal('8.00'), Decimal('2'))
+ Decimal('4.00')
+ >>> ExtendedContext.divide(Decimal('2.400'), Decimal('2.0'))
+ Decimal('1.20')
+ >>> ExtendedContext.divide(Decimal('1000'), Decimal('100'))
+ Decimal('10')
+ >>> ExtendedContext.divide(Decimal('1000'), Decimal('1'))
+ Decimal('1000')
+ >>> ExtendedContext.divide(Decimal('2.40E+6'), Decimal('2'))
+ Decimal('1.20E+6')
+ >>> ExtendedContext.divide(5, 5)
+ Decimal('1')
+ >>> ExtendedContext.divide(Decimal(5), 5)
+ Decimal('1')
+ >>> ExtendedContext.divide(5, Decimal(5))
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__div__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def divide_int(self, a, b):
+ """Divides two numbers and returns the integer part of the result.
+
+ >>> ExtendedContext.divide_int(Decimal('2'), Decimal('3'))
+ Decimal('0')
+ >>> ExtendedContext.divide_int(Decimal('10'), Decimal('3'))
+ Decimal('3')
+ >>> ExtendedContext.divide_int(Decimal('1'), Decimal('0.3'))
+ Decimal('3')
+ >>> ExtendedContext.divide_int(10, 3)
+ Decimal('3')
+ >>> ExtendedContext.divide_int(Decimal(10), 3)
+ Decimal('3')
+ >>> ExtendedContext.divide_int(10, Decimal(3))
+ Decimal('3')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__floordiv__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def divmod(self, a, b):
+ """Return (a // b, a % b).
+
+ >>> ExtendedContext.divmod(Decimal(8), Decimal(3))
+ (Decimal('2'), Decimal('2'))
+ >>> ExtendedContext.divmod(Decimal(8), Decimal(4))
+ (Decimal('2'), Decimal('0'))
+ >>> ExtendedContext.divmod(8, 4)
+ (Decimal('2'), Decimal('0'))
+ >>> ExtendedContext.divmod(Decimal(8), 4)
+ (Decimal('2'), Decimal('0'))
+ >>> ExtendedContext.divmod(8, Decimal(4))
+ (Decimal('2'), Decimal('0'))
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__divmod__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def exp(self, a):
+ """Returns e ** a.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.exp(Decimal('-Infinity'))
+ Decimal('0')
+ >>> c.exp(Decimal('-1'))
+ Decimal('0.367879441')
+ >>> c.exp(Decimal('0'))
+ Decimal('1')
+ >>> c.exp(Decimal('1'))
+ Decimal('2.71828183')
+ >>> c.exp(Decimal('0.693147181'))
+ Decimal('2.00000000')
+ >>> c.exp(Decimal('+Infinity'))
+ Decimal('Infinity')
+ >>> c.exp(10)
+ Decimal('22026.4658')
+ """
+ a =_convert_other(a, raiseit=True)
+ return a.exp(context=self)
+
+ def fma(self, a, b, c):
+ """Returns a multiplied by b, plus c.
+
+ The first two operands are multiplied together, using multiply,
+ the third operand is then added to the result of that
+ multiplication, using add, all with only one final rounding.
+
+ >>> ExtendedContext.fma(Decimal('3'), Decimal('5'), Decimal('7'))
+ Decimal('22')
+ >>> ExtendedContext.fma(Decimal('3'), Decimal('-5'), Decimal('7'))
+ Decimal('-8')
+ >>> ExtendedContext.fma(Decimal('888565290'), Decimal('1557.96930'), Decimal('-86087.7578'))
+ Decimal('1.38435736E+12')
+ >>> ExtendedContext.fma(1, 3, 4)
+ Decimal('7')
+ >>> ExtendedContext.fma(1, Decimal(3), 4)
+ Decimal('7')
+ >>> ExtendedContext.fma(1, 3, Decimal(4))
+ Decimal('7')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.fma(b, c, context=self)
+
+ def is_canonical(self, a):
+ """Return True if the operand is canonical; otherwise return False.
+
+ Currently, the encoding of a Decimal instance is always
+ canonical, so this method returns True for any Decimal.
+
+ >>> ExtendedContext.is_canonical(Decimal('2.50'))
+ True
+ """
+ return a.is_canonical()
+
+ def is_finite(self, a):
+ """Return True if the operand is finite; otherwise return False.
+
+ A Decimal instance is considered finite if it is neither
+ infinite nor a NaN.
+
+ >>> ExtendedContext.is_finite(Decimal('2.50'))
+ True
+ >>> ExtendedContext.is_finite(Decimal('-0.3'))
+ True
+ >>> ExtendedContext.is_finite(Decimal('0'))
+ True
+ >>> ExtendedContext.is_finite(Decimal('Inf'))
+ False
+ >>> ExtendedContext.is_finite(Decimal('NaN'))
+ False
+ >>> ExtendedContext.is_finite(1)
+ True
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_finite()
+
+ def is_infinite(self, a):
+ """Return True if the operand is infinite; otherwise return False.
+
+ >>> ExtendedContext.is_infinite(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_infinite(Decimal('-Inf'))
+ True
+ >>> ExtendedContext.is_infinite(Decimal('NaN'))
+ False
+ >>> ExtendedContext.is_infinite(1)
+ False
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_infinite()
+
+ def is_nan(self, a):
+ """Return True if the operand is a qNaN or sNaN;
+ otherwise return False.
+
+ >>> ExtendedContext.is_nan(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_nan(Decimal('NaN'))
+ True
+ >>> ExtendedContext.is_nan(Decimal('-sNaN'))
+ True
+ >>> ExtendedContext.is_nan(1)
+ False
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_nan()
+
+ def is_normal(self, a):
+ """Return True if the operand is a normal number;
+ otherwise return False.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.is_normal(Decimal('2.50'))
+ True
+ >>> c.is_normal(Decimal('0.1E-999'))
+ False
+ >>> c.is_normal(Decimal('0.00'))
+ False
+ >>> c.is_normal(Decimal('-Inf'))
+ False
+ >>> c.is_normal(Decimal('NaN'))
+ False
+ >>> c.is_normal(1)
+ True
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_normal(context=self)
+
+ def is_qnan(self, a):
+ """Return True if the operand is a quiet NaN; otherwise return False.
+
+ >>> ExtendedContext.is_qnan(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_qnan(Decimal('NaN'))
+ True
+ >>> ExtendedContext.is_qnan(Decimal('sNaN'))
+ False
+ >>> ExtendedContext.is_qnan(1)
+ False
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_qnan()
+
+ def is_signed(self, a):
+ """Return True if the operand is negative; otherwise return False.
+
+ >>> ExtendedContext.is_signed(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_signed(Decimal('-12'))
+ True
+ >>> ExtendedContext.is_signed(Decimal('-0'))
+ True
+ >>> ExtendedContext.is_signed(8)
+ False
+ >>> ExtendedContext.is_signed(-8)
+ True
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_signed()
+
+ def is_snan(self, a):
+ """Return True if the operand is a signaling NaN;
+ otherwise return False.
+
+ >>> ExtendedContext.is_snan(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_snan(Decimal('NaN'))
+ False
+ >>> ExtendedContext.is_snan(Decimal('sNaN'))
+ True
+ >>> ExtendedContext.is_snan(1)
+ False
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_snan()
+
+ def is_subnormal(self, a):
+ """Return True if the operand is subnormal; otherwise return False.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.is_subnormal(Decimal('2.50'))
+ False
+ >>> c.is_subnormal(Decimal('0.1E-999'))
+ True
+ >>> c.is_subnormal(Decimal('0.00'))
+ False
+ >>> c.is_subnormal(Decimal('-Inf'))
+ False
+ >>> c.is_subnormal(Decimal('NaN'))
+ False
+ >>> c.is_subnormal(1)
+ False
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_subnormal(context=self)
+
+ def is_zero(self, a):
+ """Return True if the operand is a zero; otherwise return False.
+
+ >>> ExtendedContext.is_zero(Decimal('0'))
+ True
+ >>> ExtendedContext.is_zero(Decimal('2.50'))
+ False
+ >>> ExtendedContext.is_zero(Decimal('-0E+2'))
+ True
+ >>> ExtendedContext.is_zero(1)
+ False
+ >>> ExtendedContext.is_zero(0)
+ True
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.is_zero()
+
+ def ln(self, a):
+ """Returns the natural (base e) logarithm of the operand.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.ln(Decimal('0'))
+ Decimal('-Infinity')
+ >>> c.ln(Decimal('1.000'))
+ Decimal('0')
+ >>> c.ln(Decimal('2.71828183'))
+ Decimal('1.00000000')
+ >>> c.ln(Decimal('10'))
+ Decimal('2.30258509')
+ >>> c.ln(Decimal('+Infinity'))
+ Decimal('Infinity')
+ >>> c.ln(1)
+ Decimal('0')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.ln(context=self)
+
+ def log10(self, a):
+ """Returns the base 10 logarithm of the operand.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.log10(Decimal('0'))
+ Decimal('-Infinity')
+ >>> c.log10(Decimal('0.001'))
+ Decimal('-3')
+ >>> c.log10(Decimal('1.000'))
+ Decimal('0')
+ >>> c.log10(Decimal('2'))
+ Decimal('0.301029996')
+ >>> c.log10(Decimal('10'))
+ Decimal('1')
+ >>> c.log10(Decimal('70'))
+ Decimal('1.84509804')
+ >>> c.log10(Decimal('+Infinity'))
+ Decimal('Infinity')
+ >>> c.log10(0)
+ Decimal('-Infinity')
+ >>> c.log10(1)
+ Decimal('0')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.log10(context=self)
+
+ def logb(self, a):
+ """ Returns the exponent of the magnitude of the operand's MSD.
+
+ The result is the integer which is the exponent of the magnitude
+ of the most significant digit of the operand (as though the
+ operand were truncated to a single digit while maintaining the
+ value of that digit and without limiting the resulting exponent).
+
+ >>> ExtendedContext.logb(Decimal('250'))
+ Decimal('2')
+ >>> ExtendedContext.logb(Decimal('2.50'))
+ Decimal('0')
+ >>> ExtendedContext.logb(Decimal('0.03'))
+ Decimal('-2')
+ >>> ExtendedContext.logb(Decimal('0'))
+ Decimal('-Infinity')
+ >>> ExtendedContext.logb(1)
+ Decimal('0')
+ >>> ExtendedContext.logb(10)
+ Decimal('1')
+ >>> ExtendedContext.logb(100)
+ Decimal('2')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.logb(context=self)
+
+ def logical_and(self, a, b):
+ """Applies the logical operation 'and' between each operand's digits.
+
+ The operands must be both logical numbers.
+
+ >>> ExtendedContext.logical_and(Decimal('0'), Decimal('0'))
+ Decimal('0')
+ >>> ExtendedContext.logical_and(Decimal('0'), Decimal('1'))
+ Decimal('0')
+ >>> ExtendedContext.logical_and(Decimal('1'), Decimal('0'))
+ Decimal('0')
+ >>> ExtendedContext.logical_and(Decimal('1'), Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.logical_and(Decimal('1100'), Decimal('1010'))
+ Decimal('1000')
+ >>> ExtendedContext.logical_and(Decimal('1111'), Decimal('10'))
+ Decimal('10')
+ >>> ExtendedContext.logical_and(110, 1101)
+ Decimal('100')
+ >>> ExtendedContext.logical_and(Decimal(110), 1101)
+ Decimal('100')
+ >>> ExtendedContext.logical_and(110, Decimal(1101))
+ Decimal('100')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.logical_and(b, context=self)
+
+ def logical_invert(self, a):
+ """Invert all the digits in the operand.
+
+ The operand must be a logical number.
+
+ >>> ExtendedContext.logical_invert(Decimal('0'))
+ Decimal('111111111')
+ >>> ExtendedContext.logical_invert(Decimal('1'))
+ Decimal('111111110')
+ >>> ExtendedContext.logical_invert(Decimal('111111111'))
+ Decimal('0')
+ >>> ExtendedContext.logical_invert(Decimal('101010101'))
+ Decimal('10101010')
+ >>> ExtendedContext.logical_invert(1101)
+ Decimal('111110010')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.logical_invert(context=self)
+
+ def logical_or(self, a, b):
+ """Applies the logical operation 'or' between each operand's digits.
+
+ The operands must be both logical numbers.
+
+ >>> ExtendedContext.logical_or(Decimal('0'), Decimal('0'))
+ Decimal('0')
+ >>> ExtendedContext.logical_or(Decimal('0'), Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.logical_or(Decimal('1'), Decimal('0'))
+ Decimal('1')
+ >>> ExtendedContext.logical_or(Decimal('1'), Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.logical_or(Decimal('1100'), Decimal('1010'))
+ Decimal('1110')
+ >>> ExtendedContext.logical_or(Decimal('1110'), Decimal('10'))
+ Decimal('1110')
+ >>> ExtendedContext.logical_or(110, 1101)
+ Decimal('1111')
+ >>> ExtendedContext.logical_or(Decimal(110), 1101)
+ Decimal('1111')
+ >>> ExtendedContext.logical_or(110, Decimal(1101))
+ Decimal('1111')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.logical_or(b, context=self)
+
+ def logical_xor(self, a, b):
+ """Applies the logical operation 'xor' between each operand's digits.
+
+ The operands must be both logical numbers.
+
+ >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('0'))
+ Decimal('0')
+ >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('0'))
+ Decimal('1')
+ >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('1'))
+ Decimal('0')
+ >>> ExtendedContext.logical_xor(Decimal('1100'), Decimal('1010'))
+ Decimal('110')
+ >>> ExtendedContext.logical_xor(Decimal('1111'), Decimal('10'))
+ Decimal('1101')
+ >>> ExtendedContext.logical_xor(110, 1101)
+ Decimal('1011')
+ >>> ExtendedContext.logical_xor(Decimal(110), 1101)
+ Decimal('1011')
+ >>> ExtendedContext.logical_xor(110, Decimal(1101))
+ Decimal('1011')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.logical_xor(b, context=self)
+
+ def max(self, a, b):
+ """max compares two values numerically and returns the maximum.
+
+ If either operand is a NaN then the general rules apply.
+ Otherwise, the operands are compared as though by the compare
+ operation. If they are numerically equal then the left-hand operand
+ is chosen as the result. Otherwise the maximum (closer to positive
+ infinity) of the two operands is chosen as the result.
+
+ >>> ExtendedContext.max(Decimal('3'), Decimal('2'))
+ Decimal('3')
+ >>> ExtendedContext.max(Decimal('-10'), Decimal('3'))
+ Decimal('3')
+ >>> ExtendedContext.max(Decimal('1.0'), Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.max(Decimal('7'), Decimal('NaN'))
+ Decimal('7')
+ >>> ExtendedContext.max(1, 2)
+ Decimal('2')
+ >>> ExtendedContext.max(Decimal(1), 2)
+ Decimal('2')
+ >>> ExtendedContext.max(1, Decimal(2))
+ Decimal('2')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.max(b, context=self)
+
+ def max_mag(self, a, b):
+ """Compares the values numerically with their sign ignored.
+
+ >>> ExtendedContext.max_mag(Decimal('7'), Decimal('NaN'))
+ Decimal('7')
+ >>> ExtendedContext.max_mag(Decimal('7'), Decimal('-10'))
+ Decimal('-10')
+ >>> ExtendedContext.max_mag(1, -2)
+ Decimal('-2')
+ >>> ExtendedContext.max_mag(Decimal(1), -2)
+ Decimal('-2')
+ >>> ExtendedContext.max_mag(1, Decimal(-2))
+ Decimal('-2')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.max_mag(b, context=self)
+
+ def min(self, a, b):
+ """min compares two values numerically and returns the minimum.
+
+ If either operand is a NaN then the general rules apply.
+ Otherwise, the operands are compared as though by the compare
+ operation. If they are numerically equal then the left-hand operand
+ is chosen as the result. Otherwise the minimum (closer to negative
+ infinity) of the two operands is chosen as the result.
+
+ >>> ExtendedContext.min(Decimal('3'), Decimal('2'))
+ Decimal('2')
+ >>> ExtendedContext.min(Decimal('-10'), Decimal('3'))
+ Decimal('-10')
+ >>> ExtendedContext.min(Decimal('1.0'), Decimal('1'))
+ Decimal('1.0')
+ >>> ExtendedContext.min(Decimal('7'), Decimal('NaN'))
+ Decimal('7')
+ >>> ExtendedContext.min(1, 2)
+ Decimal('1')
+ >>> ExtendedContext.min(Decimal(1), 2)
+ Decimal('1')
+ >>> ExtendedContext.min(1, Decimal(29))
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.min(b, context=self)
+
+ def min_mag(self, a, b):
+ """Compares the values numerically with their sign ignored.
+
+ >>> ExtendedContext.min_mag(Decimal('3'), Decimal('-2'))
+ Decimal('-2')
+ >>> ExtendedContext.min_mag(Decimal('-3'), Decimal('NaN'))
+ Decimal('-3')
+ >>> ExtendedContext.min_mag(1, -2)
+ Decimal('1')
+ >>> ExtendedContext.min_mag(Decimal(1), -2)
+ Decimal('1')
+ >>> ExtendedContext.min_mag(1, Decimal(-2))
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.min_mag(b, context=self)
+
+ def minus(self, a):
+ """Minus corresponds to unary prefix minus in Python.
+
+ The operation is evaluated using the same rules as subtract; the
+ operation minus(a) is calculated as subtract('0', a) where the '0'
+ has the same exponent as the operand.
+
+ >>> ExtendedContext.minus(Decimal('1.3'))
+ Decimal('-1.3')
+ >>> ExtendedContext.minus(Decimal('-1.3'))
+ Decimal('1.3')
+ >>> ExtendedContext.minus(1)
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.__neg__(context=self)
+
+ def multiply(self, a, b):
+ """multiply multiplies two operands.
+
+ If either operand is a special value then the general rules apply.
+ Otherwise, the operands are multiplied together
+ ('long multiplication'), resulting in a number which may be as long as
+ the sum of the lengths of the two operands.
+
+ >>> ExtendedContext.multiply(Decimal('1.20'), Decimal('3'))
+ Decimal('3.60')
+ >>> ExtendedContext.multiply(Decimal('7'), Decimal('3'))
+ Decimal('21')
+ >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('0.8'))
+ Decimal('0.72')
+ >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('-0'))
+ Decimal('-0.0')
+ >>> ExtendedContext.multiply(Decimal('654321'), Decimal('654321'))
+ Decimal('4.28135971E+11')
+ >>> ExtendedContext.multiply(7, 7)
+ Decimal('49')
+ >>> ExtendedContext.multiply(Decimal(7), 7)
+ Decimal('49')
+ >>> ExtendedContext.multiply(7, Decimal(7))
+ Decimal('49')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__mul__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def next_minus(self, a):
+ """Returns the largest representable number smaller than a.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> ExtendedContext.next_minus(Decimal('1'))
+ Decimal('0.999999999')
+ >>> c.next_minus(Decimal('1E-1007'))
+ Decimal('0E-1007')
+ >>> ExtendedContext.next_minus(Decimal('-1.00000003'))
+ Decimal('-1.00000004')
+ >>> c.next_minus(Decimal('Infinity'))
+ Decimal('9.99999999E+999')
+ >>> c.next_minus(1)
+ Decimal('0.999999999')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.next_minus(context=self)
+
+ def next_plus(self, a):
+ """Returns the smallest representable number larger than a.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> ExtendedContext.next_plus(Decimal('1'))
+ Decimal('1.00000001')
+ >>> c.next_plus(Decimal('-1E-1007'))
+ Decimal('-0E-1007')
+ >>> ExtendedContext.next_plus(Decimal('-1.00000003'))
+ Decimal('-1.00000002')
+ >>> c.next_plus(Decimal('-Infinity'))
+ Decimal('-9.99999999E+999')
+ >>> c.next_plus(1)
+ Decimal('1.00000001')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.next_plus(context=self)
+
+ def next_toward(self, a, b):
+ """Returns the number closest to a, in direction towards b.
+
+ The result is the closest representable number from the first
+ operand (but not the first operand) that is in the direction
+ towards the second operand, unless the operands have the same
+ value.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.next_toward(Decimal('1'), Decimal('2'))
+ Decimal('1.00000001')
+ >>> c.next_toward(Decimal('-1E-1007'), Decimal('1'))
+ Decimal('-0E-1007')
+ >>> c.next_toward(Decimal('-1.00000003'), Decimal('0'))
+ Decimal('-1.00000002')
+ >>> c.next_toward(Decimal('1'), Decimal('0'))
+ Decimal('0.999999999')
+ >>> c.next_toward(Decimal('1E-1007'), Decimal('-100'))
+ Decimal('0E-1007')
+ >>> c.next_toward(Decimal('-1.00000003'), Decimal('-10'))
+ Decimal('-1.00000004')
+ >>> c.next_toward(Decimal('0.00'), Decimal('-0.0000'))
+ Decimal('-0.00')
+ >>> c.next_toward(0, 1)
+ Decimal('1E-1007')
+ >>> c.next_toward(Decimal(0), 1)
+ Decimal('1E-1007')
+ >>> c.next_toward(0, Decimal(1))
+ Decimal('1E-1007')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.next_toward(b, context=self)
+
+ def normalize(self, a):
+ """normalize reduces an operand to its simplest form.
+
+ Essentially a plus operation with all trailing zeros removed from the
+ result.
+
+ >>> ExtendedContext.normalize(Decimal('2.1'))
+ Decimal('2.1')
+ >>> ExtendedContext.normalize(Decimal('-2.0'))
+ Decimal('-2')
+ >>> ExtendedContext.normalize(Decimal('1.200'))
+ Decimal('1.2')
+ >>> ExtendedContext.normalize(Decimal('-120'))
+ Decimal('-1.2E+2')
+ >>> ExtendedContext.normalize(Decimal('120.00'))
+ Decimal('1.2E+2')
+ >>> ExtendedContext.normalize(Decimal('0.00'))
+ Decimal('0')
+ >>> ExtendedContext.normalize(6)
+ Decimal('6')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.normalize(context=self)
+
+ def number_class(self, a):
+ """Returns an indication of the class of the operand.
+
+ The class is one of the following strings:
+ -sNaN
+ -NaN
+ -Infinity
+ -Normal
+ -Subnormal
+ -Zero
+ +Zero
+ +Subnormal
+ +Normal
+ +Infinity
+
+ >>> c = Context(ExtendedContext)
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.number_class(Decimal('Infinity'))
+ '+Infinity'
+ >>> c.number_class(Decimal('1E-10'))
+ '+Normal'
+ >>> c.number_class(Decimal('2.50'))
+ '+Normal'
+ >>> c.number_class(Decimal('0.1E-999'))
+ '+Subnormal'
+ >>> c.number_class(Decimal('0'))
+ '+Zero'
+ >>> c.number_class(Decimal('-0'))
+ '-Zero'
+ >>> c.number_class(Decimal('-0.1E-999'))
+ '-Subnormal'
+ >>> c.number_class(Decimal('-1E-10'))
+ '-Normal'
+ >>> c.number_class(Decimal('-2.50'))
+ '-Normal'
+ >>> c.number_class(Decimal('-Infinity'))
+ '-Infinity'
+ >>> c.number_class(Decimal('NaN'))
+ 'NaN'
+ >>> c.number_class(Decimal('-NaN'))
+ 'NaN'
+ >>> c.number_class(Decimal('sNaN'))
+ 'sNaN'
+ >>> c.number_class(123)
+ '+Normal'
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.number_class(context=self)
+
+ def plus(self, a):
+ """Plus corresponds to unary prefix plus in Python.
+
+ The operation is evaluated using the same rules as add; the
+ operation plus(a) is calculated as add('0', a) where the '0'
+ has the same exponent as the operand.
+
+ >>> ExtendedContext.plus(Decimal('1.3'))
+ Decimal('1.3')
+ >>> ExtendedContext.plus(Decimal('-1.3'))
+ Decimal('-1.3')
+ >>> ExtendedContext.plus(-1)
+ Decimal('-1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.__pos__(context=self)
+
+ def power(self, a, b, modulo=None):
+ """Raises a to the power of b, to modulo if given.
+
+ With two arguments, compute a**b. If a is negative then b
+ must be integral. The result will be inexact unless b is
+ integral and the result is finite and can be expressed exactly
+ in 'precision' digits.
+
+ With three arguments, compute (a**b) % modulo. For the
+ three argument form, the following restrictions on the
+ arguments hold:
+
+ - all three arguments must be integral
+ - b must be nonnegative
+ - at least one of a or b must be nonzero
+ - modulo must be nonzero and have at most 'precision' digits
+
+ The result of pow(a, b, modulo) is identical to the result
+ that would be obtained by computing (a**b) % modulo with
+ unbounded precision, but is computed more efficiently. It is
+ always exact.
+
+ >>> c = ExtendedContext.copy()
+ >>> c.Emin = -999
+ >>> c.Emax = 999
+ >>> c.power(Decimal('2'), Decimal('3'))
+ Decimal('8')
+ >>> c.power(Decimal('-2'), Decimal('3'))
+ Decimal('-8')
+ >>> c.power(Decimal('2'), Decimal('-3'))
+ Decimal('0.125')
+ >>> c.power(Decimal('1.7'), Decimal('8'))
+ Decimal('69.7575744')
+ >>> c.power(Decimal('10'), Decimal('0.301029996'))
+ Decimal('2.00000000')
+ >>> c.power(Decimal('Infinity'), Decimal('-1'))
+ Decimal('0')
+ >>> c.power(Decimal('Infinity'), Decimal('0'))
+ Decimal('1')
+ >>> c.power(Decimal('Infinity'), Decimal('1'))
+ Decimal('Infinity')
+ >>> c.power(Decimal('-Infinity'), Decimal('-1'))
+ Decimal('-0')
+ >>> c.power(Decimal('-Infinity'), Decimal('0'))
+ Decimal('1')
+ >>> c.power(Decimal('-Infinity'), Decimal('1'))
+ Decimal('-Infinity')
+ >>> c.power(Decimal('-Infinity'), Decimal('2'))
+ Decimal('Infinity')
+ >>> c.power(Decimal('0'), Decimal('0'))
+ Decimal('NaN')
+
+ >>> c.power(Decimal('3'), Decimal('7'), Decimal('16'))
+ Decimal('11')
+ >>> c.power(Decimal('-3'), Decimal('7'), Decimal('16'))
+ Decimal('-11')
+ >>> c.power(Decimal('-3'), Decimal('8'), Decimal('16'))
+ Decimal('1')
+ >>> c.power(Decimal('3'), Decimal('7'), Decimal('-16'))
+ Decimal('11')
+ >>> c.power(Decimal('23E12345'), Decimal('67E189'), Decimal('123456789'))
+ Decimal('11729830')
+ >>> c.power(Decimal('-0'), Decimal('17'), Decimal('1729'))
+ Decimal('-0')
+ >>> c.power(Decimal('-23'), Decimal('0'), Decimal('65537'))
+ Decimal('1')
+ >>> ExtendedContext.power(7, 7)
+ Decimal('823543')
+ >>> ExtendedContext.power(Decimal(7), 7)
+ Decimal('823543')
+ >>> ExtendedContext.power(7, Decimal(7), 2)
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__pow__(b, modulo, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def quantize(self, a, b):
+ """Returns a value equal to 'a' (rounded), having the exponent of 'b'.
+
+ The coefficient of the result is derived from that of the left-hand
+ operand. It may be rounded using the current rounding setting (if the
+ exponent is being increased), multiplied by a positive power of ten (if
+ the exponent is being decreased), or is unchanged (if the exponent is
+ already equal to that of the right-hand operand).
+
+ Unlike other operations, if the length of the coefficient after the
+ quantize operation would be greater than precision then an Invalid
+ operation condition is raised. This guarantees that, unless there is
+ an error condition, the exponent of the result of a quantize is always
+ equal to that of the right-hand operand.
+
+ Also unlike other operations, quantize will never raise Underflow, even
+ if the result is subnormal and inexact.
+
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.001'))
+ Decimal('2.170')
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.01'))
+ Decimal('2.17')
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.1'))
+ Decimal('2.2')
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+0'))
+ Decimal('2')
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+1'))
+ Decimal('0E+1')
+ >>> ExtendedContext.quantize(Decimal('-Inf'), Decimal('Infinity'))
+ Decimal('-Infinity')
+ >>> ExtendedContext.quantize(Decimal('2'), Decimal('Infinity'))
+ Decimal('NaN')
+ >>> ExtendedContext.quantize(Decimal('-0.1'), Decimal('1'))
+ Decimal('-0')
+ >>> ExtendedContext.quantize(Decimal('-0'), Decimal('1e+5'))
+ Decimal('-0E+5')
+ >>> ExtendedContext.quantize(Decimal('+35236450.6'), Decimal('1e-2'))
+ Decimal('NaN')
+ >>> ExtendedContext.quantize(Decimal('-35236450.6'), Decimal('1e-2'))
+ Decimal('NaN')
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-1'))
+ Decimal('217.0')
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-0'))
+ Decimal('217')
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+1'))
+ Decimal('2.2E+2')
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+2'))
+ Decimal('2E+2')
+ >>> ExtendedContext.quantize(1, 2)
+ Decimal('1')
+ >>> ExtendedContext.quantize(Decimal(1), 2)
+ Decimal('1')
+ >>> ExtendedContext.quantize(1, Decimal(2))
+ Decimal('1')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.quantize(b, context=self)
+
+ def radix(self):
+ """Just returns 10, as this is Decimal, :)
+
+ >>> ExtendedContext.radix()
+ Decimal('10')
+ """
+ return Decimal(10)
+
+ def remainder(self, a, b):
+ """Returns the remainder from integer division.
+
+ The result is the residue of the dividend after the operation of
+ calculating integer division as described for divide-integer, rounded
+ to precision digits if necessary. The sign of the result, if
+ non-zero, is the same as that of the original dividend.
+
+ This operation will fail under the same conditions as integer division
+ (that is, if integer division on the same two operands would fail, the
+ remainder cannot be calculated).
+
+ >>> ExtendedContext.remainder(Decimal('2.1'), Decimal('3'))
+ Decimal('2.1')
+ >>> ExtendedContext.remainder(Decimal('10'), Decimal('3'))
+ Decimal('1')
+ >>> ExtendedContext.remainder(Decimal('-10'), Decimal('3'))
+ Decimal('-1')
+ >>> ExtendedContext.remainder(Decimal('10.2'), Decimal('1'))
+ Decimal('0.2')
+ >>> ExtendedContext.remainder(Decimal('10'), Decimal('0.3'))
+ Decimal('0.1')
+ >>> ExtendedContext.remainder(Decimal('3.6'), Decimal('1.3'))
+ Decimal('1.0')
+ >>> ExtendedContext.remainder(22, 6)
+ Decimal('4')
+ >>> ExtendedContext.remainder(Decimal(22), 6)
+ Decimal('4')
+ >>> ExtendedContext.remainder(22, Decimal(6))
+ Decimal('4')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__mod__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def remainder_near(self, a, b):
+ """Returns to be "a - b * n", where n is the integer nearest the exact
+ value of "x / b" (if two integers are equally near then the even one
+ is chosen). If the result is equal to 0 then its sign will be the
+ sign of a.
+
+ This operation will fail under the same conditions as integer division
+ (that is, if integer division on the same two operands would fail, the
+ remainder cannot be calculated).
+
+ >>> ExtendedContext.remainder_near(Decimal('2.1'), Decimal('3'))
+ Decimal('-0.9')
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('6'))
+ Decimal('-2')
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('3'))
+ Decimal('1')
+ >>> ExtendedContext.remainder_near(Decimal('-10'), Decimal('3'))
+ Decimal('-1')
+ >>> ExtendedContext.remainder_near(Decimal('10.2'), Decimal('1'))
+ Decimal('0.2')
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('0.3'))
+ Decimal('0.1')
+ >>> ExtendedContext.remainder_near(Decimal('3.6'), Decimal('1.3'))
+ Decimal('-0.3')
+ >>> ExtendedContext.remainder_near(3, 11)
+ Decimal('3')
+ >>> ExtendedContext.remainder_near(Decimal(3), 11)
+ Decimal('3')
+ >>> ExtendedContext.remainder_near(3, Decimal(11))
+ Decimal('3')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.remainder_near(b, context=self)
+
+ def rotate(self, a, b):
+ """Returns a rotated copy of a, b times.
+
+ The coefficient of the result is a rotated copy of the digits in
+ the coefficient of the first operand. The number of places of
+ rotation is taken from the absolute value of the second operand,
+ with the rotation being to the left if the second operand is
+ positive or to the right otherwise.
+
+ >>> ExtendedContext.rotate(Decimal('34'), Decimal('8'))
+ Decimal('400000003')
+ >>> ExtendedContext.rotate(Decimal('12'), Decimal('9'))
+ Decimal('12')
+ >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('-2'))
+ Decimal('891234567')
+ >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('0'))
+ Decimal('123456789')
+ >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('+2'))
+ Decimal('345678912')
+ >>> ExtendedContext.rotate(1333333, 1)
+ Decimal('13333330')
+ >>> ExtendedContext.rotate(Decimal(1333333), 1)
+ Decimal('13333330')
+ >>> ExtendedContext.rotate(1333333, Decimal(1))
+ Decimal('13333330')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.rotate(b, context=self)
+
+ def same_quantum(self, a, b):
+ """Returns True if the two operands have the same exponent.
+
+ The result is never affected by either the sign or the coefficient of
+ either operand.
+
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.001'))
+ False
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.01'))
+ True
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('1'))
+ False
+ >>> ExtendedContext.same_quantum(Decimal('Inf'), Decimal('-Inf'))
+ True
+ >>> ExtendedContext.same_quantum(10000, -1)
+ True
+ >>> ExtendedContext.same_quantum(Decimal(10000), -1)
+ True
+ >>> ExtendedContext.same_quantum(10000, Decimal(-1))
+ True
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.same_quantum(b)
+
+ def scaleb (self, a, b):
+ """Returns the first operand after adding the second value its exp.
+
+ >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('-2'))
+ Decimal('0.0750')
+ >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('0'))
+ Decimal('7.50')
+ >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('3'))
+ Decimal('7.50E+3')
+ >>> ExtendedContext.scaleb(1, 4)
+ Decimal('1E+4')
+ >>> ExtendedContext.scaleb(Decimal(1), 4)
+ Decimal('1E+4')
+ >>> ExtendedContext.scaleb(1, Decimal(4))
+ Decimal('1E+4')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.scaleb(b, context=self)
+
+ def shift(self, a, b):
+ """Returns a shifted copy of a, b times.
+
+ The coefficient of the result is a shifted copy of the digits
+ in the coefficient of the first operand. The number of places
+ to shift is taken from the absolute value of the second operand,
+ with the shift being to the left if the second operand is
+ positive or to the right otherwise. Digits shifted into the
+ coefficient are zeros.
+
+ >>> ExtendedContext.shift(Decimal('34'), Decimal('8'))
+ Decimal('400000000')
+ >>> ExtendedContext.shift(Decimal('12'), Decimal('9'))
+ Decimal('0')
+ >>> ExtendedContext.shift(Decimal('123456789'), Decimal('-2'))
+ Decimal('1234567')
+ >>> ExtendedContext.shift(Decimal('123456789'), Decimal('0'))
+ Decimal('123456789')
+ >>> ExtendedContext.shift(Decimal('123456789'), Decimal('+2'))
+ Decimal('345678900')
+ >>> ExtendedContext.shift(88888888, 2)
+ Decimal('888888800')
+ >>> ExtendedContext.shift(Decimal(88888888), 2)
+ Decimal('888888800')
+ >>> ExtendedContext.shift(88888888, Decimal(2))
+ Decimal('888888800')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.shift(b, context=self)
+
+ def sqrt(self, a):
+ """Square root of a non-negative number to context precision.
+
+ If the result must be inexact, it is rounded using the round-half-even
+ algorithm.
+
+ >>> ExtendedContext.sqrt(Decimal('0'))
+ Decimal('0')
+ >>> ExtendedContext.sqrt(Decimal('-0'))
+ Decimal('-0')
+ >>> ExtendedContext.sqrt(Decimal('0.39'))
+ Decimal('0.624499800')
+ >>> ExtendedContext.sqrt(Decimal('100'))
+ Decimal('10')
+ >>> ExtendedContext.sqrt(Decimal('1'))
+ Decimal('1')
+ >>> ExtendedContext.sqrt(Decimal('1.0'))
+ Decimal('1.0')
+ >>> ExtendedContext.sqrt(Decimal('1.00'))
+ Decimal('1.0')
+ >>> ExtendedContext.sqrt(Decimal('7'))
+ Decimal('2.64575131')
+ >>> ExtendedContext.sqrt(Decimal('10'))
+ Decimal('3.16227766')
+ >>> ExtendedContext.sqrt(2)
+ Decimal('1.41421356')
+ >>> ExtendedContext.prec
+ 9
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.sqrt(context=self)
+
+ def subtract(self, a, b):
+ """Return the difference between the two operands.
+
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.07'))
+ Decimal('0.23')
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.30'))
+ Decimal('0.00')
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('2.07'))
+ Decimal('-0.77')
+ >>> ExtendedContext.subtract(8, 5)
+ Decimal('3')
+ >>> ExtendedContext.subtract(Decimal(8), 5)
+ Decimal('3')
+ >>> ExtendedContext.subtract(8, Decimal(5))
+ Decimal('3')
+ """
+ a = _convert_other(a, raiseit=True)
+ r = a.__sub__(b, context=self)
+ if r is NotImplemented:
+ raise TypeError("Unable to convert %s to Decimal" % b)
+ else:
+ return r
+
+ def to_eng_string(self, a):
+ """Convert to a string, using engineering notation if an exponent is needed.
+
+ Engineering notation has an exponent which is a multiple of 3. This
+ can leave up to 3 digits to the left of the decimal place and may
+ require the addition of either one or two trailing zeros.
+
+ The operation is not affected by the context.
+
+ >>> ExtendedContext.to_eng_string(Decimal('123E+1'))
+ '1.23E+3'
+ >>> ExtendedContext.to_eng_string(Decimal('123E+3'))
+ '123E+3'
+ >>> ExtendedContext.to_eng_string(Decimal('123E-10'))
+ '12.3E-9'
+ >>> ExtendedContext.to_eng_string(Decimal('-123E-12'))
+ '-123E-12'
+ >>> ExtendedContext.to_eng_string(Decimal('7E-7'))
+ '700E-9'
+ >>> ExtendedContext.to_eng_string(Decimal('7E+1'))
+ '70'
+ >>> ExtendedContext.to_eng_string(Decimal('0E+1'))
+ '0.00E+3'
+
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.to_eng_string(context=self)
+
+ def to_sci_string(self, a):
+ """Converts a number to a string, using scientific notation.
+
+ The operation is not affected by the context.
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.__str__(context=self)
+
+ def to_integral_exact(self, a):
+ """Rounds to an integer.
+
+ When the operand has a negative exponent, the result is the same
+ as using the quantize() operation using the given operand as the
+ left-hand-operand, 1E+0 as the right-hand-operand, and the precision
+ of the operand as the precision setting; Inexact and Rounded flags
+ are allowed in this operation. The rounding mode is taken from the
+ context.
+
+ >>> ExtendedContext.to_integral_exact(Decimal('2.1'))
+ Decimal('2')
+ >>> ExtendedContext.to_integral_exact(Decimal('100'))
+ Decimal('100')
+ >>> ExtendedContext.to_integral_exact(Decimal('100.0'))
+ Decimal('100')
+ >>> ExtendedContext.to_integral_exact(Decimal('101.5'))
+ Decimal('102')
+ >>> ExtendedContext.to_integral_exact(Decimal('-101.5'))
+ Decimal('-102')
+ >>> ExtendedContext.to_integral_exact(Decimal('10E+5'))
+ Decimal('1.0E+6')
+ >>> ExtendedContext.to_integral_exact(Decimal('7.89E+77'))
+ Decimal('7.89E+77')
+ >>> ExtendedContext.to_integral_exact(Decimal('-Inf'))
+ Decimal('-Infinity')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.to_integral_exact(context=self)
+
+ def to_integral_value(self, a):
+ """Rounds to an integer.
+
+ When the operand has a negative exponent, the result is the same
+ as using the quantize() operation using the given operand as the
+ left-hand-operand, 1E+0 as the right-hand-operand, and the precision
+ of the operand as the precision setting, except that no flags will
+ be set. The rounding mode is taken from the context.
+
+ >>> ExtendedContext.to_integral_value(Decimal('2.1'))
+ Decimal('2')
+ >>> ExtendedContext.to_integral_value(Decimal('100'))
+ Decimal('100')
+ >>> ExtendedContext.to_integral_value(Decimal('100.0'))
+ Decimal('100')
+ >>> ExtendedContext.to_integral_value(Decimal('101.5'))
+ Decimal('102')
+ >>> ExtendedContext.to_integral_value(Decimal('-101.5'))
+ Decimal('-102')
+ >>> ExtendedContext.to_integral_value(Decimal('10E+5'))
+ Decimal('1.0E+6')
+ >>> ExtendedContext.to_integral_value(Decimal('7.89E+77'))
+ Decimal('7.89E+77')
+ >>> ExtendedContext.to_integral_value(Decimal('-Inf'))
+ Decimal('-Infinity')
+ """
+ a = _convert_other(a, raiseit=True)
+ return a.to_integral_value(context=self)
+
+ # the method name changed, but we provide also the old one, for compatibility
+ to_integral = to_integral_value
+
+class _WorkRep(object):
+ __slots__ = ('sign','int','exp')
+ # sign: 0 or 1
+ # int: int or long
+ # exp: None, int, or string
+
+ def __init__(self, value=None):
+ if value is None:
+ self.sign = None
+ self.int = 0
+ self.exp = None
+ elif isinstance(value, Decimal):
+ self.sign = value._sign
+ self.int = int(value._int)
+ self.exp = value._exp
+ else:
+ # assert isinstance(value, tuple)
+ self.sign = value[0]
+ self.int = value[1]
+ self.exp = value[2]
+
+ def __repr__(self):
+ return "(%r, %r, %r)" % (self.sign, self.int, self.exp)
+
+ __str__ = __repr__
+
+
+
+def _normalize(op1, op2, prec = 0):
+ """Normalizes op1, op2 to have the same exp and length of coefficient.
+
+ Done during addition.
+ """
+ if op1.exp < op2.exp:
+ tmp = op2
+ other = op1
+ else:
+ tmp = op1
+ other = op2
+
+ # Let exp = min(tmp.exp - 1, tmp.adjusted() - precision - 1).
+ # Then adding 10**exp to tmp has the same effect (after rounding)
+ # as adding any positive quantity smaller than 10**exp; similarly
+ # for subtraction. So if other is smaller than 10**exp we replace
+ # it with 10**exp. This avoids tmp.exp - other.exp getting too large.
+ tmp_len = len(str(tmp.int))
+ other_len = len(str(other.int))
+ exp = tmp.exp + min(-1, tmp_len - prec - 2)
+ if other_len + other.exp - 1 < exp:
+ other.int = 1
+ other.exp = exp
+
+ tmp.int *= 10 ** (tmp.exp - other.exp)
+ tmp.exp = other.exp
+ return op1, op2
+
+##### Integer arithmetic functions used by ln, log10, exp and __pow__ #####
+
+# This function from Tim Peters was taken from here:
+# http://mail.python.org/pipermail/python-list/1999-July/007758.html
+# The correction being in the function definition is for speed, and
+# the whole function is not resolved with math.log because of avoiding
+# the use of floats.
+def _nbits(n, correction = {
+ '0': 4, '1': 3, '2': 2, '3': 2,
+ '4': 1, '5': 1, '6': 1, '7': 1,
+ '8': 0, '9': 0, 'a': 0, 'b': 0,
+ 'c': 0, 'd': 0, 'e': 0, 'f': 0}):
+ """Number of bits in binary representation of the positive integer n,
+ or 0 if n == 0.
+ """
+ if n < 0:
+ raise ValueError("The argument to _nbits should be nonnegative.")
+ hex_n = "%x" % n
+ return 4*len(hex_n) - correction[hex_n[0]]
+
+def _decimal_lshift_exact(n, e):
+ """ Given integers n and e, return n * 10**e if it's an integer, else None.
+
+ The computation is designed to avoid computing large powers of 10
+ unnecessarily.
+
+ >>> _decimal_lshift_exact(3, 4)
+ 30000
+ >>> _decimal_lshift_exact(300, -999999999) # returns None
+
+ """
+ if n == 0:
+ return 0
+ elif e >= 0:
+ return n * 10**e
+ else:
+ # val_n = largest power of 10 dividing n.
+ str_n = str(abs(n))
+ val_n = len(str_n) - len(str_n.rstrip('0'))
+ return None if val_n < -e else n // 10**-e
+
+def _sqrt_nearest(n, a):
+ """Closest integer to the square root of the positive integer n. a is
+ an initial approximation to the square root. Any positive integer
+ will do for a, but the closer a is to the square root of n the
+ faster convergence will be.
+
+ """
+ if n <= 0 or a <= 0:
+ raise ValueError("Both arguments to _sqrt_nearest should be positive.")
+
+ b=0
+ while a != b:
+ b, a = a, a--n//a>>1
+ return a
+
+def _rshift_nearest(x, shift):
+ """Given an integer x and a nonnegative integer shift, return closest
+ integer to x / 2**shift; use round-to-even in case of a tie.
+
+ """
+ b, q = 1L << shift, x >> shift
+ return q + (2*(x & (b-1)) + (q&1) > b)
+
+def _div_nearest(a, b):
+ """Closest integer to a/b, a and b positive integers; rounds to even
+ in the case of a tie.
+
+ """
+ q, r = divmod(a, b)
+ return q + (2*r + (q&1) > b)
+
+def _ilog(x, M, L = 8):
+ """Integer approximation to M*log(x/M), with absolute error boundable
+ in terms only of x/M.
+
+ Given positive integers x and M, return an integer approximation to
+ M * log(x/M). For L = 8 and 0.1 <= x/M <= 10 the difference
+ between the approximation and the exact result is at most 22. For
+ L = 8 and 1.0 <= x/M <= 10.0 the difference is at most 15. In
+ both cases these are upper bounds on the error; it will usually be
+ much smaller."""
+
+ # The basic algorithm is the following: let log1p be the function
+ # log1p(x) = log(1+x). Then log(x/M) = log1p((x-M)/M). We use
+ # the reduction
+ #
+ # log1p(y) = 2*log1p(y/(1+sqrt(1+y)))
+ #
+ # repeatedly until the argument to log1p is small (< 2**-L in
+ # absolute value). For small y we can use the Taylor series
+ # expansion
+ #
+ # log1p(y) ~ y - y**2/2 + y**3/3 - ... - (-y)**T/T
+ #
+ # truncating at T such that y**T is small enough. The whole
+ # computation is carried out in a form of fixed-point arithmetic,
+ # with a real number z being represented by an integer
+ # approximation to z*M. To avoid loss of precision, the y below
+ # is actually an integer approximation to 2**R*y*M, where R is the
+ # number of reductions performed so far.
+
+ y = x-M
+ # argument reduction; R = number of reductions performed
+ R = 0
+ while (R <= L and long(abs(y)) << L-R >= M or
+ R > L and abs(y) >> R-L >= M):
+ y = _div_nearest(long(M*y) << 1,
+ M + _sqrt_nearest(M*(M+_rshift_nearest(y, R)), M))
+ R += 1
+
+ # Taylor series with T terms
+ T = -int(-10*len(str(M))//(3*L))
+ yshift = _rshift_nearest(y, R)
+ w = _div_nearest(M, T)
+ for k in xrange(T-1, 0, -1):
+ w = _div_nearest(M, k) - _div_nearest(yshift*w, M)
+
+ return _div_nearest(w*y, M)
+
+def _dlog10(c, e, p):
+ """Given integers c, e and p with c > 0, p >= 0, compute an integer
+ approximation to 10**p * log10(c*10**e), with an absolute error of
+ at most 1. Assumes that c*10**e is not exactly 1."""
+
+ # increase precision by 2; compensate for this by dividing
+ # final result by 100
+ p += 2
+
+ # write c*10**e as d*10**f with either:
+ # f >= 0 and 1 <= d <= 10, or
+ # f <= 0 and 0.1 <= d <= 1.
+ # Thus for c*10**e close to 1, f = 0
+ l = len(str(c))
+ f = e+l - (e+l >= 1)
+
+ if p > 0:
+ M = 10**p
+ k = e+p-f
+ if k >= 0:
+ c *= 10**k
+ else:
+ c = _div_nearest(c, 10**-k)
+
+ log_d = _ilog(c, M) # error < 5 + 22 = 27
+ log_10 = _log10_digits(p) # error < 1
+ log_d = _div_nearest(log_d*M, log_10)
+ log_tenpower = f*M # exact
+ else:
+ log_d = 0 # error < 2.31
+ log_tenpower = _div_nearest(f, 10**-p) # error < 0.5
+
+ return _div_nearest(log_tenpower+log_d, 100)
+
+def _dlog(c, e, p):
+ """Given integers c, e and p with c > 0, compute an integer
+ approximation to 10**p * log(c*10**e), with an absolute error of
+ at most 1. Assumes that c*10**e is not exactly 1."""
+
+ # Increase precision by 2. The precision increase is compensated
+ # for at the end with a division by 100.
+ p += 2
+
+ # rewrite c*10**e as d*10**f with either f >= 0 and 1 <= d <= 10,
+ # or f <= 0 and 0.1 <= d <= 1. Then we can compute 10**p * log(c*10**e)
+ # as 10**p * log(d) + 10**p*f * log(10).
+ l = len(str(c))
+ f = e+l - (e+l >= 1)
+
+ # compute approximation to 10**p*log(d), with error < 27
+ if p > 0:
+ k = e+p-f
+ if k >= 0:
+ c *= 10**k
+ else:
+ c = _div_nearest(c, 10**-k) # error of <= 0.5 in c
+
+ # _ilog magnifies existing error in c by a factor of at most 10
+ log_d = _ilog(c, 10**p) # error < 5 + 22 = 27
+ else:
+ # p <= 0: just approximate the whole thing by 0; error < 2.31
+ log_d = 0
+
+ # compute approximation to f*10**p*log(10), with error < 11.
+ if f:
+ extra = len(str(abs(f)))-1
+ if p + extra >= 0:
+ # error in f * _log10_digits(p+extra) < |f| * 1 = |f|
+ # after division, error < |f|/10**extra + 0.5 < 10 + 0.5 < 11
+ f_log_ten = _div_nearest(f*_log10_digits(p+extra), 10**extra)
+ else:
+ f_log_ten = 0
+ else:
+ f_log_ten = 0
+
+ # error in sum < 11+27 = 38; error after division < 0.38 + 0.5 < 1
+ return _div_nearest(f_log_ten + log_d, 100)
+
+class _Log10Memoize(object):
+ """Class to compute, store, and allow retrieval of, digits of the
+ constant log(10) = 2.302585.... This constant is needed by
+ Decimal.ln, Decimal.log10, Decimal.exp and Decimal.__pow__."""
+ def __init__(self):
+ self.digits = "23025850929940456840179914546843642076011014886"
+
+ def getdigits(self, p):
+ """Given an integer p >= 0, return floor(10**p)*log(10).
+
+ For example, self.getdigits(3) returns 2302.
+ """
+ # digits are stored as a string, for quick conversion to
+ # integer in the case that we've already computed enough
+ # digits; the stored digits should always be correct
+ # (truncated, not rounded to nearest).
+ if p < 0:
+ raise ValueError("p should be nonnegative")
+
+ if p >= len(self.digits):
+ # compute p+3, p+6, p+9, ... digits; continue until at
+ # least one of the extra digits is nonzero
+ extra = 3
+ while True:
+ # compute p+extra digits, correct to within 1ulp
+ M = 10**(p+extra+2)
+ digits = str(_div_nearest(_ilog(10*M, M), 100))
+ if digits[-extra:] != '0'*extra:
+ break
+ extra += 3
+ # keep all reliable digits so far; remove trailing zeros
+ # and next nonzero digit
+ self.digits = digits.rstrip('0')[:-1]
+ return int(self.digits[:p+1])
+
+_log10_digits = _Log10Memoize().getdigits
+
+def _iexp(x, M, L=8):
+ """Given integers x and M, M > 0, such that x/M is small in absolute
+ value, compute an integer approximation to M*exp(x/M). For 0 <=
+ x/M <= 2.4, the absolute error in the result is bounded by 60 (and
+ is usually much smaller)."""
+
+ # Algorithm: to compute exp(z) for a real number z, first divide z
+ # by a suitable power R of 2 so that |z/2**R| < 2**-L. Then
+ # compute expm1(z/2**R) = exp(z/2**R) - 1 using the usual Taylor
+ # series
+ #
+ # expm1(x) = x + x**2/2! + x**3/3! + ...
+ #
+ # Now use the identity
+ #
+ # expm1(2x) = expm1(x)*(expm1(x)+2)
+ #
+ # R times to compute the sequence expm1(z/2**R),
+ # expm1(z/2**(R-1)), ... , exp(z/2), exp(z).
+
+ # Find R such that x/2**R/M <= 2**-L
+ R = _nbits((long(x)< M
+ T = -int(-10*len(str(M))//(3*L))
+ y = _div_nearest(x, T)
+ Mshift = long(M)<= 0:
+ cshift = c*10**shift
+ else:
+ cshift = c//10**-shift
+ quot, rem = divmod(cshift, _log10_digits(q))
+
+ # reduce remainder back to original precision
+ rem = _div_nearest(rem, 10**extra)
+
+ # error in result of _iexp < 120; error after division < 0.62
+ return _div_nearest(_iexp(rem, 10**p), 1000), quot - p + 3
+
+def _dpower(xc, xe, yc, ye, p):
+ """Given integers xc, xe, yc and ye representing Decimals x = xc*10**xe and
+ y = yc*10**ye, compute x**y. Returns a pair of integers (c, e) such that:
+
+ 10**(p-1) <= c <= 10**p, and
+ (c-1)*10**e < x**y < (c+1)*10**e
+
+ in other words, c*10**e is an approximation to x**y with p digits
+ of precision, and with an error in c of at most 1. (This is
+ almost, but not quite, the same as the error being < 1ulp: when c
+ == 10**(p-1) we can only guarantee error < 10ulp.)
+
+ We assume that: x is positive and not equal to 1, and y is nonzero.
+ """
+
+ # Find b such that 10**(b-1) <= |y| <= 10**b
+ b = len(str(abs(yc))) + ye
+
+ # log(x) = lxc*10**(-p-b-1), to p+b+1 places after the decimal point
+ lxc = _dlog(xc, xe, p+b+1)
+
+ # compute product y*log(x) = yc*lxc*10**(-p-b-1+ye) = pc*10**(-p-1)
+ shift = ye-b
+ if shift >= 0:
+ pc = lxc*yc*10**shift
+ else:
+ pc = _div_nearest(lxc*yc, 10**-shift)
+
+ if pc == 0:
+ # we prefer a result that isn't exactly 1; this makes it
+ # easier to compute a correctly rounded result in __pow__
+ if ((len(str(xc)) + xe >= 1) == (yc > 0)): # if x**y > 1:
+ coeff, exp = 10**(p-1)+1, 1-p
+ else:
+ coeff, exp = 10**p-1, -p
+ else:
+ coeff, exp = _dexp(pc, -(p+1), p+1)
+ coeff = _div_nearest(coeff, 10)
+ exp += 1
+
+ return coeff, exp
+
+def _log10_lb(c, correction = {
+ '1': 100, '2': 70, '3': 53, '4': 40, '5': 31,
+ '6': 23, '7': 16, '8': 10, '9': 5}):
+ """Compute a lower bound for 100*log10(c) for a positive integer c."""
+ if c <= 0:
+ raise ValueError("The argument to _log10_lb should be nonnegative.")
+ str_c = str(c)
+ return 100*len(str_c) - correction[str_c[0]]
+
+##### Helper Functions ####################################################
+
+def _convert_other(other, raiseit=False, allow_float=False):
+ """Convert other to Decimal.
+
+ Verifies that it's ok to use in an implicit construction.
+ If allow_float is true, allow conversion from float; this
+ is used in the comparison methods (__eq__ and friends).
+
+ """
+ if isinstance(other, Decimal):
+ return other
+ if isinstance(other, (int, long)):
+ return Decimal(other)
+ if allow_float and isinstance(other, float):
+ return Decimal.from_float(other)
+
+ import sys
+ if sys.platform == 'cli':
+ import System
+ if isinstance(other, System.Decimal):
+ return Decimal(other)
+
+ if raiseit:
+ raise TypeError("Unable to convert %s to Decimal" % other)
+ return NotImplemented
+
+##### Setup Specific Contexts ############################################
+
+# The default context prototype used by Context()
+# Is mutable, so that new contexts can have different default values
+
+DefaultContext = Context(
+ prec=28, rounding=ROUND_HALF_EVEN,
+ traps=[DivisionByZero, Overflow, InvalidOperation],
+ flags=[],
+ Emax=999999999,
+ Emin=-999999999,
+ capitals=1
+)
+
+# Pre-made alternate contexts offered by the specification
+# Don't change these; the user should be able to select these
+# contexts and be able to reproduce results from other implementations
+# of the spec.
+
+BasicContext = Context(
+ prec=9, rounding=ROUND_HALF_UP,
+ traps=[DivisionByZero, Overflow, InvalidOperation, Clamped, Underflow],
+ flags=[],
+)
+
+ExtendedContext = Context(
+ prec=9, rounding=ROUND_HALF_EVEN,
+ traps=[],
+ flags=[],
+)
+
+
+##### crud for parsing strings #############################################
+#
+# Regular expression used for parsing numeric strings. Additional
+# comments:
+#
+# 1. Uncomment the two '\s*' lines to allow leading and/or trailing
+# whitespace. But note that the specification disallows whitespace in
+# a numeric string.
+#
+# 2. For finite numbers (not infinities and NaNs) the body of the
+# number between the optional sign and the optional exponent must have
+# at least one decimal digit, possibly after the decimal point. The
+# lookahead expression '(?=\d|\.\d)' checks this.
+
+import re
+_parser = re.compile(r""" # A numeric string consists of:
+# \s*
+ (?P[-+])? # an optional sign, followed by either...
+ (
+ (?=\d|\.\d) # ...a number (with at least one digit)
+ (?P\d*) # having a (possibly empty) integer part
+ (\.(?P\d*))? # followed by an optional fractional part
+ (E(?P[-+]?\d+))? # followed by an optional exponent, or...
+ |
+ Inf(inity)? # ...an infinity, or...
+ |
+ (?Ps)? # ...an (optionally signaling)
+ NaN # NaN
+ (?P\d*) # with (possibly empty) diagnostic info.
+ )
+# \s*
+ \Z
+""", re.VERBOSE | re.IGNORECASE | re.UNICODE).match
+
+_all_zeros = re.compile('0*$').match
+_exact_half = re.compile('50*$').match
+
+##### PEP3101 support functions ##############################################
+# The functions in this section have little to do with the Decimal
+# class, and could potentially be reused or adapted for other pure
+# Python numeric classes that want to implement __format__
+#
+# A format specifier for Decimal looks like:
+#
+# [[fill]align][sign][0][minimumwidth][,][.precision][type]
+
+_parse_format_specifier_regex = re.compile(r"""\A
+(?:
+ (?P.)?
+ (?P[<>=^])
+)?
+(?P[-+ ])?
+(?P0)?
+(?P(?!0)\d+)?
+(?P,)?
+(?:\.(?P0|(?!0)\d+))?
+(?P[eEfFgGn%])?
+\Z
+""", re.VERBOSE)
+
+del re
+
+# The locale module is only needed for the 'n' format specifier. The
+# rest of the PEP 3101 code functions quite happily without it, so we
+# don't care too much if locale isn't present.
+try:
+ import locale as _locale
+except ImportError:
+ pass
+
+def _parse_format_specifier(format_spec, _localeconv=None):
+ """Parse and validate a format specifier.
+
+ Turns a standard numeric format specifier into a dict, with the
+ following entries:
+
+ fill: fill character to pad field to minimum width
+ align: alignment type, either '<', '>', '=' or '^'
+ sign: either '+', '-' or ' '
+ minimumwidth: nonnegative integer giving minimum width
+ zeropad: boolean, indicating whether to pad with zeros
+ thousands_sep: string to use as thousands separator, or ''
+ grouping: grouping for thousands separators, in format
+ used by localeconv
+ decimal_point: string to use for decimal point
+ precision: nonnegative integer giving precision, or None
+ type: one of the characters 'eEfFgG%', or None
+ unicode: boolean (always True for Python 3.x)
+
+ """
+ m = _parse_format_specifier_regex.match(format_spec)
+ if m is None:
+ raise ValueError("Invalid format specifier: " + format_spec)
+
+ # get the dictionary
+ format_dict = m.groupdict()
+
+ # zeropad; defaults for fill and alignment. If zero padding
+ # is requested, the fill and align fields should be absent.
+ fill = format_dict['fill']
+ align = format_dict['align']
+ format_dict['zeropad'] = (format_dict['zeropad'] is not None)
+ if format_dict['zeropad']:
+ if fill is not None:
+ raise ValueError("Fill character conflicts with '0'"
+ " in format specifier: " + format_spec)
+ if align is not None:
+ raise ValueError("Alignment conflicts with '0' in "
+ "format specifier: " + format_spec)
+ format_dict['fill'] = fill or ' '
+ # PEP 3101 originally specified that the default alignment should
+ # be left; it was later agreed that right-aligned makes more sense
+ # for numeric types. See http://bugs.python.org/issue6857.
+ format_dict['align'] = align or '>'
+
+ # default sign handling: '-' for negative, '' for positive
+ if format_dict['sign'] is None:
+ format_dict['sign'] = '-'
+
+ # minimumwidth defaults to 0; precision remains None if not given
+ format_dict['minimumwidth'] = int(format_dict['minimumwidth'] or '0')
+ if format_dict['precision'] is not None:
+ format_dict['precision'] = int(format_dict['precision'])
+
+ # if format type is 'g' or 'G' then a precision of 0 makes little
+ # sense; convert it to 1. Same if format type is unspecified.
+ if format_dict['precision'] == 0:
+ if format_dict['type'] is None or format_dict['type'] in 'gG':
+ format_dict['precision'] = 1
+
+ # determine thousands separator, grouping, and decimal separator, and
+ # add appropriate entries to format_dict
+ if format_dict['type'] == 'n':
+ # apart from separators, 'n' behaves just like 'g'
+ format_dict['type'] = 'g'
+ if _localeconv is None:
+ _localeconv = _locale.localeconv()
+ if format_dict['thousands_sep'] is not None:
+ raise ValueError("Explicit thousands separator conflicts with "
+ "'n' type in format specifier: " + format_spec)
+ format_dict['thousands_sep'] = _localeconv['thousands_sep']
+ format_dict['grouping'] = _localeconv['grouping']
+ format_dict['decimal_point'] = _localeconv['decimal_point']
+ else:
+ if format_dict['thousands_sep'] is None:
+ format_dict['thousands_sep'] = ''
+ format_dict['grouping'] = [3, 0]
+ format_dict['decimal_point'] = '.'
+
+ # record whether return type should be str or unicode
+ try:
+ format_dict['unicode'] = isinstance(format_spec, unicode)
+ except NameError:
+ format_dict['unicode'] = False
+
+ return format_dict
+
+def _format_align(sign, body, spec):
+ """Given an unpadded, non-aligned numeric string 'body' and sign
+ string 'sign', add padding and alignment conforming to the given
+ format specifier dictionary 'spec' (as produced by
+ parse_format_specifier).
+
+ Also converts result to unicode if necessary.
+
+ """
+ # how much extra space do we have to play with?
+ minimumwidth = spec['minimumwidth']
+ fill = spec['fill']
+ padding = fill*(minimumwidth - len(sign) - len(body))
+
+ align = spec['align']
+ if align == '<':
+ result = sign + body + padding
+ elif align == '>':
+ result = padding + sign + body
+ elif align == '=':
+ result = sign + padding + body
+ elif align == '^':
+ half = len(padding)//2
+ result = padding[:half] + sign + body + padding[half:]
+ else:
+ raise ValueError('Unrecognised alignment field')
+
+ # make sure that result is unicode if necessary
+ if spec['unicode']:
+ result = unicode(result)
+
+ return result
+
+def _group_lengths(grouping):
+ """Convert a localeconv-style grouping into a (possibly infinite)
+ iterable of integers representing group lengths.
+
+ """
+ # The result from localeconv()['grouping'], and the input to this
+ # function, should be a list of integers in one of the
+ # following three forms:
+ #
+ # (1) an empty list, or
+ # (2) nonempty list of positive integers + [0]
+ # (3) list of positive integers + [locale.CHAR_MAX], or
+
+ from itertools import chain, repeat
+ if not grouping:
+ return []
+ elif grouping[-1] == 0 and len(grouping) >= 2:
+ return chain(grouping[:-1], repeat(grouping[-2]))
+ elif grouping[-1] == _locale.CHAR_MAX:
+ return grouping[:-1]
+ else:
+ raise ValueError('unrecognised format for grouping')
+
+def _insert_thousands_sep(digits, spec, min_width=1):
+ """Insert thousands separators into a digit string.
+
+ spec is a dictionary whose keys should include 'thousands_sep' and
+ 'grouping'; typically it's the result of parsing the format
+ specifier using _parse_format_specifier.
+
+ The min_width keyword argument gives the minimum length of the
+ result, which will be padded on the left with zeros if necessary.
+
+ If necessary, the zero padding adds an extra '0' on the left to
+ avoid a leading thousands separator. For example, inserting
+ commas every three digits in '123456', with min_width=8, gives
+ '0,123,456', even though that has length 9.
+
+ """
+
+ sep = spec['thousands_sep']
+ grouping = spec['grouping']
+
+ groups = []
+ for l in _group_lengths(grouping):
+ if l <= 0:
+ raise ValueError("group length should be positive")
+ # max(..., 1) forces at least 1 digit to the left of a separator
+ l = min(max(len(digits), min_width, 1), l)
+ groups.append('0'*(l - len(digits)) + digits[-l:])
+ digits = digits[:-l]
+ min_width -= l
+ if not digits and min_width <= 0:
+ break
+ min_width -= len(sep)
+ else:
+ l = max(len(digits), min_width, 1)
+ groups.append('0'*(l - len(digits)) + digits[-l:])
+ return sep.join(reversed(groups))
+
+def _format_sign(is_negative, spec):
+ """Determine sign character."""
+
+ if is_negative:
+ return '-'
+ elif spec['sign'] in ' +':
+ return spec['sign']
+ else:
+ return ''
+
+def _format_number(is_negative, intpart, fracpart, exp, spec):
+ """Format a number, given the following data:
+
+ is_negative: true if the number is negative, else false
+ intpart: string of digits that must appear before the decimal point
+ fracpart: string of digits that must come after the point
+ exp: exponent, as an integer
+ spec: dictionary resulting from parsing the format specifier
+
+ This function uses the information in spec to:
+ insert separators (decimal separator and thousands separators)
+ format the sign
+ format the exponent
+ add trailing '%' for the '%' type
+ zero-pad if necessary
+ fill and align if necessary
+ """
+
+ sign = _format_sign(is_negative, spec)
+
+ if fracpart:
+ fracpart = spec['decimal_point'] + fracpart
+
+ if exp != 0 or spec['type'] in 'eE':
+ echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']]
+ fracpart += "{0}{1:+}".format(echar, exp)
+ if spec['type'] == '%':
+ fracpart += '%'
+
+ if spec['zeropad']:
+ min_width = spec['minimumwidth'] - len(fracpart) - len(sign)
+ else:
+ min_width = 0
+ intpart = _insert_thousands_sep(intpart, spec, min_width)
+
+ return _format_align(sign, intpart+fracpart, spec)
+
+
+##### Useful Constants (internal use only) ################################
+
+# Reusable defaults
+_Infinity = Decimal('Inf')
+_NegativeInfinity = Decimal('-Inf')
+_NaN = Decimal('NaN')
+_Zero = Decimal(0)
+_One = Decimal(1)
+_NegativeOne = Decimal(-1)
+
+# _SignedInfinity[sign] is infinity w/ that sign
+_SignedInfinity = (_Infinity, _NegativeInfinity)
+
+
+
+if __name__ == '__main__':
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__])
diff --git a/cashew/Lib/difflib.py b/cashew/Lib/difflib.py
new file mode 100644
index 0000000..788a92d
--- /dev/null
+++ b/cashew/Lib/difflib.py
@@ -0,0 +1,2057 @@
+"""
+Module difflib -- helpers for computing deltas between objects.
+
+Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ Use SequenceMatcher to return list of the best "good enough" matches.
+
+Function context_diff(a, b):
+ For two lists of strings, return a delta in context diff format.
+
+Function ndiff(a, b):
+ Return a delta: the difference between `a` and `b` (lists of strings).
+
+Function restore(delta, which):
+ Return one of the two sequences that generated an ndiff delta.
+
+Function unified_diff(a, b):
+ For two lists of strings, return a delta in unified diff format.
+
+Class SequenceMatcher:
+ A flexible class for comparing pairs of sequences of any type.
+
+Class Differ:
+ For producing human-readable deltas from sequences of lines of text.
+
+Class HtmlDiff:
+ For producing HTML side by side comparison with change highlights.
+"""
+
+__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
+ 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
+ 'unified_diff', 'HtmlDiff', 'Match']
+
+import heapq
+from collections import namedtuple as _namedtuple
+from functools import reduce
+
+Match = _namedtuple('Match', 'a b size')
+
+def _calculate_ratio(matches, length):
+ if length:
+ return 2.0 * matches / length
+ return 1.0
+
+class SequenceMatcher:
+
+ """
+ SequenceMatcher is a flexible class for comparing pairs of sequences of
+ any type, so long as the sequence elements are hashable. The basic
+ algorithm predates, and is a little fancier than, an algorithm
+ published in the late 1980's by Ratcliff and Obershelp under the
+ hyperbolic name "gestalt pattern matching". The basic idea is to find
+ the longest contiguous matching subsequence that contains no "junk"
+ elements (R-O doesn't address junk). The same idea is then applied
+ recursively to the pieces of the sequences to the left and to the right
+ of the matching subsequence. This does not yield minimal edit
+ sequences, but does tend to yield matches that "look right" to people.
+
+ SequenceMatcher tries to compute a "human-friendly diff" between two
+ sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
+ longest *contiguous* & junk-free matching subsequence. That's what
+ catches peoples' eyes. The Windows(tm) windiff has another interesting
+ notion, pairing up elements that appear uniquely in each sequence.
+ That, and the method here, appear to yield more intuitive difference
+ reports than does diff. This method appears to be the least vulnerable
+ to synching up on blocks of "junk lines", though (like blank lines in
+ ordinary text files, or maybe "" lines in HTML files). That may be
+ because this is the only method of the 3 that has a *concept* of
+ "junk" .
+
+ Example, comparing two strings, and considering blanks to be "junk":
+
+ >>> s = SequenceMatcher(lambda x: x == " ",
+ ... "private Thread currentThread;",
+ ... "private volatile Thread currentThread;")
+ >>>
+
+ .ratio() returns a float in [0, 1], measuring the "similarity" of the
+ sequences. As a rule of thumb, a .ratio() value over 0.6 means the
+ sequences are close matches:
+
+ >>> print round(s.ratio(), 3)
+ 0.866
+ >>>
+
+ If you're only interested in where the sequences match,
+ .get_matching_blocks() is handy:
+
+ >>> for block in s.get_matching_blocks():
+ ... print "a[%d] and b[%d] match for %d elements" % block
+ a[0] and b[0] match for 8 elements
+ a[8] and b[17] match for 21 elements
+ a[29] and b[38] match for 0 elements
+
+ Note that the last tuple returned by .get_matching_blocks() is always a
+ dummy, (len(a), len(b), 0), and this is the only case in which the last
+ tuple element (number of elements matched) is 0.
+
+ If you want to know how to change the first sequence into the second,
+ use .get_opcodes():
+
+ >>> for opcode in s.get_opcodes():
+ ... print "%6s a[%d:%d] b[%d:%d]" % opcode
+ equal a[0:8] b[0:8]
+ insert a[8:8] b[8:17]
+ equal a[8:29] b[17:38]
+
+ See the Differ class for a fancy human-friendly file differencer, which
+ uses SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ See also function get_close_matches() in this module, which shows how
+ simple code building on SequenceMatcher can be used to do useful work.
+
+ Timing: Basic R-O is cubic time worst case and quadratic time expected
+ case. SequenceMatcher is quadratic time for the worst case and has
+ expected-case behavior dependent in a complicated way on how many
+ elements the sequences have in common; best case time is linear.
+
+ Methods:
+
+ __init__(isjunk=None, a='', b='')
+ Construct a SequenceMatcher.
+
+ set_seqs(a, b)
+ Set the two sequences to be compared.
+
+ set_seq1(a)
+ Set the first sequence to be compared.
+
+ set_seq2(b)
+ Set the second sequence to be compared.
+
+ find_longest_match(alo, ahi, blo, bhi)
+ Find longest matching block in a[alo:ahi] and b[blo:bhi].
+
+ get_matching_blocks()
+ Return list of triples describing matching subsequences.
+
+ get_opcodes()
+ Return list of 5-tuples describing how to turn a into b.
+
+ ratio()
+ Return a measure of the sequences' similarity (float in [0,1]).
+
+ quick_ratio()
+ Return an upper bound on .ratio() relatively quickly.
+
+ real_quick_ratio()
+ Return an upper bound on ratio() very quickly.
+ """
+
+ def __init__(self, isjunk=None, a='', b='', autojunk=True):
+ """Construct a SequenceMatcher.
+
+ Optional arg isjunk is None (the default), or a one-argument
+ function that takes a sequence element and returns true iff the
+ element is junk. None is equivalent to passing "lambda x: 0", i.e.
+ no elements are considered to be junk. For example, pass
+ lambda x: x in " \\t"
+ if you're comparing lines as sequences of characters, and don't
+ want to synch up on blanks or hard tabs.
+
+ Optional arg a is the first of two sequences to be compared. By
+ default, an empty string. The elements of a must be hashable. See
+ also .set_seqs() and .set_seq1().
+
+ Optional arg b is the second of two sequences to be compared. By
+ default, an empty string. The elements of b must be hashable. See
+ also .set_seqs() and .set_seq2().
+
+ Optional arg autojunk should be set to False to disable the
+ "automatic junk heuristic" that treats popular elements as junk
+ (see module documentation for more information).
+ """
+
+ # Members:
+ # a
+ # first sequence
+ # b
+ # second sequence; differences are computed as "what do
+ # we need to do to 'a' to change it into 'b'?"
+ # b2j
+ # for x in b, b2j[x] is a list of the indices (into b)
+ # at which x appears; junk elements do not appear
+ # fullbcount
+ # for x in b, fullbcount[x] == the number of times x
+ # appears in b; only materialized if really needed (used
+ # only for computing quick_ratio())
+ # matching_blocks
+ # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
+ # ascending & non-overlapping in i and in j; terminated by
+ # a dummy (len(a), len(b), 0) sentinel
+ # opcodes
+ # a list of (tag, i1, i2, j1, j2) tuples, where tag is
+ # one of
+ # 'replace' a[i1:i2] should be replaced by b[j1:j2]
+ # 'delete' a[i1:i2] should be deleted
+ # 'insert' b[j1:j2] should be inserted
+ # 'equal' a[i1:i2] == b[j1:j2]
+ # isjunk
+ # a user-supplied function taking a sequence element and
+ # returning true iff the element is "junk" -- this has
+ # subtle but helpful effects on the algorithm, which I'll
+ # get around to writing up someday <0.9 wink>.
+ # DON'T USE! Only __chain_b uses this. Use isbjunk.
+ # isbjunk
+ # for x in b, isbjunk(x) == isjunk(x) but much faster;
+ # it's really the __contains__ method of a hidden dict.
+ # DOES NOT WORK for x in a!
+ # isbpopular
+ # for x in b, isbpopular(x) is true iff b is reasonably long
+ # (at least 200 elements) and x accounts for more than 1 + 1% of
+ # its elements (when autojunk is enabled).
+ # DOES NOT WORK for x in a!
+
+ self.isjunk = isjunk
+ self.a = self.b = None
+ self.autojunk = autojunk
+ self.set_seqs(a, b)
+
+ def set_seqs(self, a, b):
+ """Set the two sequences to be compared.
+
+ >>> s = SequenceMatcher()
+ >>> s.set_seqs("abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ """
+
+ self.set_seq1(a)
+ self.set_seq2(b)
+
+ def set_seq1(self, a):
+ """Set the first sequence to be compared.
+
+ The second sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq1("bcde")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq2().
+ """
+
+ if a is self.a:
+ return
+ self.a = a
+ self.matching_blocks = self.opcodes = None
+
+ def set_seq2(self, b):
+ """Set the second sequence to be compared.
+
+ The first sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq2("abcd")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq1().
+ """
+
+ if b is self.b:
+ return
+ self.b = b
+ self.matching_blocks = self.opcodes = None
+ self.fullbcount = None
+ self.__chain_b()
+
+ # For each element x in b, set b2j[x] to a list of the indices in
+ # b where x appears; the indices are in increasing order; note that
+ # the number of times x appears in b is len(b2j[x]) ...
+ # when self.isjunk is defined, junk elements don't show up in this
+ # map at all, which stops the central find_longest_match method
+ # from starting any matching block at a junk element ...
+ # also creates the fast isbjunk function ...
+ # b2j also does not contain entries for "popular" elements, meaning
+ # elements that account for more than 1 + 1% of the total elements, and
+ # when the sequence is reasonably large (>= 200 elements); this can
+ # be viewed as an adaptive notion of semi-junk, and yields an enormous
+ # speedup when, e.g., comparing program files with hundreds of
+ # instances of "return NULL;" ...
+ # note that this is only called when b changes; so for cross-product
+ # kinds of matches, it's best to call set_seq2 once, then set_seq1
+ # repeatedly
+
+ def __chain_b(self):
+ # Because isjunk is a user-defined (not C) function, and we test
+ # for junk a LOT, it's important to minimize the number of calls.
+ # Before the tricks described here, __chain_b was by far the most
+ # time-consuming routine in the whole module! If anyone sees
+ # Jim Roskind, thank him again for profile.py -- I never would
+ # have guessed that.
+ # The first trick is to build b2j ignoring the possibility
+ # of junk. I.e., we don't call isjunk at all yet. Throwing
+ # out the junk later is much cheaper than building b2j "right"
+ # from the start.
+ b = self.b
+ self.b2j = b2j = {}
+
+ for i, elt in enumerate(b):
+ indices = b2j.setdefault(elt, [])
+ indices.append(i)
+
+ # Purge junk elements
+ junk = set()
+ isjunk = self.isjunk
+ if isjunk:
+ for elt in list(b2j.keys()): # using list() since b2j is modified
+ if isjunk(elt):
+ junk.add(elt)
+ del b2j[elt]
+
+ # Purge popular elements that are not junk
+ popular = set()
+ n = len(b)
+ if self.autojunk and n >= 200:
+ ntest = n // 100 + 1
+ for elt, idxs in list(b2j.items()):
+ if len(idxs) > ntest:
+ popular.add(elt)
+ del b2j[elt]
+
+ # Now for x in b, isjunk(x) == x in junk, but the latter is much faster.
+ # Sicne the number of *unique* junk elements is probably small, the
+ # memory burden of keeping this set alive is likely trivial compared to
+ # the size of b2j.
+ self.isbjunk = junk.__contains__
+ self.isbpopular = popular.__contains__
+
+ def find_longest_match(self, alo, ahi, blo, bhi):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi].
+
+ If isjunk is not defined:
+
+ Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
+ alo <= i <= i+k <= ahi
+ blo <= j <= j+k <= bhi
+ and for all (i',j',k') meeting those conditions,
+ k >= k'
+ i <= i'
+ and if i == i', j <= j'
+
+ In other words, of all maximal matching blocks, return one that
+ starts earliest in a, and of all those maximal matching blocks that
+ start earliest in a, return the one that starts earliest in b.
+
+ >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=0, b=4, size=5)
+
+ If isjunk is defined, first the longest matching block is
+ determined as above, but with the additional restriction that no
+ junk element appears in the block. Then that block is extended as
+ far as possible by matching (only) junk elements on both sides. So
+ the resulting block never matches on junk except as identical junk
+ happens to be adjacent to an "interesting" match.
+
+ Here's the same example as before, but considering blanks to be
+ junk. That prevents " abcd" from matching the " abcd" at the tail
+ end of the second sequence directly. Instead only the "abcd" can
+ match, and matches the leftmost "abcd" in the second sequence:
+
+ >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=1, b=0, size=4)
+
+ If no blocks match, return (alo, blo, 0).
+
+ >>> s = SequenceMatcher(None, "ab", "c")
+ >>> s.find_longest_match(0, 2, 0, 1)
+ Match(a=0, b=0, size=0)
+ """
+
+ # CAUTION: stripping common prefix or suffix would be incorrect.
+ # E.g.,
+ # ab
+ # acab
+ # Longest matching block is "ab", but if common prefix is
+ # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
+ # strip, so ends up claiming that ab is changed to acab by
+ # inserting "ca" in the middle. That's minimal but unintuitive:
+ # "it's obvious" that someone inserted "ac" at the front.
+ # Windiff ends up at the same place as diff, but by pairing up
+ # the unique 'b's and then matching the first two 'a's.
+
+ a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
+ besti, bestj, bestsize = alo, blo, 0
+ # find longest junk-free match
+ # during an iteration of the loop, j2len[j] = length of longest
+ # junk-free match ending with a[i-1] and b[j]
+ j2len = {}
+ nothing = []
+ for i in xrange(alo, ahi):
+ # look at all instances of a[i] in b; note that because
+ # b2j has no junk keys, the loop is skipped if a[i] is junk
+ j2lenget = j2len.get
+ newj2len = {}
+ for j in b2j.get(a[i], nothing):
+ # a[i] matches b[j]
+ if j < blo:
+ continue
+ if j >= bhi:
+ break
+ k = newj2len[j] = j2lenget(j-1, 0) + 1
+ if k > bestsize:
+ besti, bestj, bestsize = i-k+1, j-k+1, k
+ j2len = newj2len
+
+ # Extend the best by non-junk elements on each end. In particular,
+ # "popular" non-junk elements aren't in b2j, which greatly speeds
+ # the inner loop above, but also means "the best" match so far
+ # doesn't contain any junk *or* popular non-junk elements.
+ while besti > alo and bestj > blo and \
+ not isbjunk(b[bestj-1]) and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ not isbjunk(b[bestj+bestsize]) and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize += 1
+
+ # Now that we have a wholly interesting match (albeit possibly
+ # empty!), we may as well suck up the matching junk on each
+ # side of it too. Can't think of a good reason not to, and it
+ # saves post-processing the (possibly considerable) expense of
+ # figuring out what to do with it. In the case of an empty
+ # interesting match, this is clearly the right thing to do,
+ # because no other kind of match is possible in the regions.
+ while besti > alo and bestj > blo and \
+ isbjunk(b[bestj-1]) and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ isbjunk(b[bestj+bestsize]) and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize = bestsize + 1
+
+ return Match(besti, bestj, bestsize)
+
+ def get_matching_blocks(self):
+ """Return list of triples describing matching subsequences.
+
+ Each triple is of the form (i, j, n), and means that
+ a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
+ i and in j. New in Python 2.5, it's also guaranteed that if
+ (i, j, n) and (i', j', n') are adjacent triples in the list, and
+ the second is not the last triple in the list, then i+n != i' or
+ j+n != j'. IOW, adjacent triples never describe adjacent equal
+ blocks.
+
+ The last triple is a dummy, (len(a), len(b), 0), and is the only
+ triple with n==0.
+
+ >>> s = SequenceMatcher(None, "abxcd", "abcd")
+ >>> s.get_matching_blocks()
+ [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
+ """
+
+ if self.matching_blocks is not None:
+ return self.matching_blocks
+ la, lb = len(self.a), len(self.b)
+
+ # This is most naturally expressed as a recursive algorithm, but
+ # at least one user bumped into extreme use cases that exceeded
+ # the recursion limit on their box. So, now we maintain a list
+ # ('queue`) of blocks we still need to look at, and append partial
+ # results to `matching_blocks` in a loop; the matches are sorted
+ # at the end.
+ queue = [(0, la, 0, lb)]
+ matching_blocks = []
+ while queue:
+ alo, ahi, blo, bhi = queue.pop()
+ i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
+ # a[alo:i] vs b[blo:j] unknown
+ # a[i:i+k] same as b[j:j+k]
+ # a[i+k:ahi] vs b[j+k:bhi] unknown
+ if k: # if k is 0, there was no matching block
+ matching_blocks.append(x)
+ if alo < i and blo < j:
+ queue.append((alo, i, blo, j))
+ if i+k < ahi and j+k < bhi:
+ queue.append((i+k, ahi, j+k, bhi))
+ matching_blocks.sort()
+
+ # It's possible that we have adjacent equal blocks in the
+ # matching_blocks list now. Starting with 2.5, this code was added
+ # to collapse them.
+ i1 = j1 = k1 = 0
+ non_adjacent = []
+ for i2, j2, k2 in matching_blocks:
+ # Is this block adjacent to i1, j1, k1?
+ if i1 + k1 == i2 and j1 + k1 == j2:
+ # Yes, so collapse them -- this just increases the length of
+ # the first block by the length of the second, and the first
+ # block so lengthened remains the block to compare against.
+ k1 += k2
+ else:
+ # Not adjacent. Remember the first block (k1==0 means it's
+ # the dummy we started with), and make the second block the
+ # new block to compare against.
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+ i1, j1, k1 = i2, j2, k2
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+
+ non_adjacent.append( (la, lb, 0) )
+ self.matching_blocks = map(Match._make, non_adjacent)
+ return self.matching_blocks
+
+ def get_opcodes(self):
+ """Return list of 5-tuples describing how to turn a into b.
+
+ Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple
+ has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
+ tuple preceding it, and likewise for j1 == the previous j2.
+
+ The tags are strings, with these meanings:
+
+ 'replace': a[i1:i2] should be replaced by b[j1:j2]
+ 'delete': a[i1:i2] should be deleted.
+ Note that j1==j2 in this case.
+ 'insert': b[j1:j2] should be inserted at a[i1:i1].
+ Note that i1==i2 in this case.
+ 'equal': a[i1:i2] == b[j1:j2]
+
+ >>> a = "qabxcd"
+ >>> b = "abycdf"
+ >>> s = SequenceMatcher(None, a, b)
+ >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
+ ... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
+ ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))
+ delete a[0:1] (q) b[0:0] ()
+ equal a[1:3] (ab) b[0:2] (ab)
+ replace a[3:4] (x) b[2:3] (y)
+ equal a[4:6] (cd) b[3:5] (cd)
+ insert a[6:6] () b[5:6] (f)
+ """
+
+ if self.opcodes is not None:
+ return self.opcodes
+ i = j = 0
+ self.opcodes = answer = []
+ for ai, bj, size in self.get_matching_blocks():
+ # invariant: we've pumped out correct diffs to change
+ # a[:i] into b[:j], and the next matching block is
+ # a[ai:ai+size] == b[bj:bj+size]. So we need to pump
+ # out a diff to change a[i:ai] into b[j:bj], pump out
+ # the matching block, and move (i,j) beyond the match
+ tag = ''
+ if i < ai and j < bj:
+ tag = 'replace'
+ elif i < ai:
+ tag = 'delete'
+ elif j < bj:
+ tag = 'insert'
+ if tag:
+ answer.append( (tag, i, ai, j, bj) )
+ i, j = ai+size, bj+size
+ # the list of matching blocks is terminated by a
+ # sentinel with size 0
+ if size:
+ answer.append( ('equal', ai, i, bj, j) )
+ return answer
+
+ def get_grouped_opcodes(self, n=3):
+ """ Isolate change clusters by eliminating ranges with no changes.
+
+ Return a generator of groups with up to n lines of context.
+ Each group is in the same format as returned by get_opcodes().
+
+ >>> from pprint import pprint
+ >>> a = map(str, range(1,40))
+ >>> b = a[:]
+ >>> b[8:8] = ['i'] # Make an insertion
+ >>> b[20] += 'x' # Make a replacement
+ >>> b[23:28] = [] # Make a deletion
+ >>> b[30] += 'y' # Make another replacement
+ >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+ [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+ [('equal', 16, 19, 17, 20),
+ ('replace', 19, 20, 20, 21),
+ ('equal', 20, 22, 21, 23),
+ ('delete', 22, 27, 23, 23),
+ ('equal', 27, 30, 23, 26)],
+ [('equal', 31, 34, 27, 30),
+ ('replace', 34, 35, 30, 31),
+ ('equal', 35, 38, 31, 34)]]
+ """
+
+ codes = self.get_opcodes()
+ if not codes:
+ codes = [("equal", 0, 1, 0, 1)]
+ # Fixup leading and trailing groups if they show no changes.
+ if codes[0][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[0]
+ codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+ if codes[-1][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[-1]
+ codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+
+ nn = n + n
+ group = []
+ for tag, i1, i2, j1, j2 in codes:
+ # End the current group and start a new one whenever
+ # there is a large range with no changes.
+ if tag == 'equal' and i2-i1 > nn:
+ group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+ yield group
+ group = []
+ i1, j1 = max(i1, i2-n), max(j1, j2-n)
+ group.append((tag, i1, i2, j1 ,j2))
+ if group and not (len(group)==1 and group[0][0] == 'equal'):
+ yield group
+
+ def ratio(self):
+ """Return a measure of the sequences' similarity (float in [0,1]).
+
+ Where T is the total number of elements in both sequences, and
+ M is the number of matches, this is 2.0*M / T.
+ Note that this is 1 if the sequences are identical, and 0 if
+ they have nothing in common.
+
+ .ratio() is expensive to compute if you haven't already computed
+ .get_matching_blocks() or .get_opcodes(), in which case you may
+ want to try .quick_ratio() or .real_quick_ratio() first to get an
+ upper bound.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.quick_ratio()
+ 0.75
+ >>> s.real_quick_ratio()
+ 1.0
+ """
+
+ matches = reduce(lambda sum, triple: sum + triple[-1],
+ self.get_matching_blocks(), 0)
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def quick_ratio(self):
+ """Return an upper bound on ratio() relatively quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute.
+ """
+
+ # viewing a and b as multisets, set matches to the cardinality
+ # of their intersection; this counts the number of matches
+ # without regard to order, so is clearly an upper bound
+ if self.fullbcount is None:
+ self.fullbcount = fullbcount = {}
+ for elt in self.b:
+ fullbcount[elt] = fullbcount.get(elt, 0) + 1
+ fullbcount = self.fullbcount
+ # avail[x] is the number of times x appears in 'b' less the
+ # number of times we've seen it in 'a' so far ... kinda
+ avail = {}
+ availhas, matches = avail.__contains__, 0
+ for elt in self.a:
+ if availhas(elt):
+ numb = avail[elt]
+ else:
+ numb = fullbcount.get(elt, 0)
+ avail[elt] = numb - 1
+ if numb > 0:
+ matches = matches + 1
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def real_quick_ratio(self):
+ """Return an upper bound on ratio() very quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute than either .ratio() or .quick_ratio().
+ """
+
+ la, lb = len(self.a), len(self.b)
+ # can't have more matches than the number of elements in the
+ # shorter sequence
+ return _calculate_ratio(min(la, lb), la + lb)
+
+def get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ """Use SequenceMatcher to return list of the best "good enough" matches.
+
+ word is a sequence for which close matches are desired (typically a
+ string).
+
+ possibilities is a list of sequences against which to match word
+ (typically a list of strings).
+
+ Optional arg n (default 3) is the maximum number of close matches to
+ return. n must be > 0.
+
+ Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
+ that don't score at least that similar to word are ignored.
+
+ The best (no more than n) matches among the possibilities are returned
+ in a list, sorted by similarity score, most similar first.
+
+ >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"])
+ ['apple', 'ape']
+ >>> import keyword as _keyword
+ >>> get_close_matches("wheel", _keyword.kwlist)
+ ['while']
+ >>> get_close_matches("apple", _keyword.kwlist)
+ []
+ >>> get_close_matches("accept", _keyword.kwlist)
+ ['except']
+ """
+
+ if not n > 0:
+ raise ValueError("n must be > 0: %r" % (n,))
+ if not 0.0 <= cutoff <= 1.0:
+ raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
+ result = []
+ s = SequenceMatcher()
+ s.set_seq2(word)
+ for x in possibilities:
+ s.set_seq1(x)
+ if s.real_quick_ratio() >= cutoff and \
+ s.quick_ratio() >= cutoff and \
+ s.ratio() >= cutoff:
+ result.append((s.ratio(), x))
+
+ # Move the best scorers to head of list
+ result = heapq.nlargest(n, result)
+ # Strip scores for the best n matches
+ return [x for score, x in result]
+
+def _count_leading(line, ch):
+ """
+ Return number of `ch` characters at the start of `line`.
+
+ Example:
+
+ >>> _count_leading(' abc', ' ')
+ 3
+ """
+
+ i, n = 0, len(line)
+ while i < n and line[i] == ch:
+ i += 1
+ return i
+
+class Differ:
+ r"""
+ Differ is a class for comparing sequences of lines of text, and
+ producing human-readable differences or deltas. Differ uses
+ SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ Each line of a Differ delta begins with a two-letter code:
+
+ '- ' line unique to sequence 1
+ '+ ' line unique to sequence 2
+ ' ' line common to both sequences
+ '? ' line not present in either input sequence
+
+ Lines beginning with '? ' attempt to guide the eye to intraline
+ differences, and were not present in either input sequence. These lines
+ can be confusing if the sequences contain tab characters.
+
+ Note that Differ makes no claim to produce a *minimal* diff. To the
+ contrary, minimal diffs are often counter-intuitive, because they synch
+ up anywhere possible, sometimes accidental matches 100 pages apart.
+ Restricting synch points to contiguous matches preserves some notion of
+ locality, at the occasional cost of producing a longer diff.
+
+ Example: Comparing two texts.
+
+ First we set up the texts, sequences of individual single-line strings
+ ending with newlines (such sequences can also be obtained from the
+ `readlines()` method of file-like objects):
+
+ >>> text1 = ''' 1. Beautiful is better than ugly.
+ ... 2. Explicit is better than implicit.
+ ... 3. Simple is better than complex.
+ ... 4. Complex is better than complicated.
+ ... '''.splitlines(1)
+ >>> len(text1)
+ 4
+ >>> text1[0][-1]
+ '\n'
+ >>> text2 = ''' 1. Beautiful is better than ugly.
+ ... 3. Simple is better than complex.
+ ... 4. Complicated is better than complex.
+ ... 5. Flat is better than nested.
+ ... '''.splitlines(1)
+
+ Next we instantiate a Differ object:
+
+ >>> d = Differ()
+
+ Note that when instantiating a Differ object we may pass functions to
+ filter out line and character 'junk'. See Differ.__init__ for details.
+
+ Finally, we compare the two:
+
+ >>> result = list(d.compare(text1, text2))
+
+ 'result' is a list of strings, so let's pretty-print it:
+
+ >>> from pprint import pprint as _pprint
+ >>> _pprint(result)
+ [' 1. Beautiful is better than ugly.\n',
+ '- 2. Explicit is better than implicit.\n',
+ '- 3. Simple is better than complex.\n',
+ '+ 3. Simple is better than complex.\n',
+ '? ++\n',
+ '- 4. Complex is better than complicated.\n',
+ '? ^ ---- ^\n',
+ '+ 4. Complicated is better than complex.\n',
+ '? ++++ ^ ^\n',
+ '+ 5. Flat is better than nested.\n']
+
+ As a single multi-line string it looks like this:
+
+ >>> print ''.join(result),
+ 1. Beautiful is better than ugly.
+ - 2. Explicit is better than implicit.
+ - 3. Simple is better than complex.
+ + 3. Simple is better than complex.
+ ? ++
+ - 4. Complex is better than complicated.
+ ? ^ ---- ^
+ + 4. Complicated is better than complex.
+ ? ++++ ^ ^
+ + 5. Flat is better than nested.
+
+ Methods:
+
+ __init__(linejunk=None, charjunk=None)
+ Construct a text differencer, with optional filters.
+
+ compare(a, b)
+ Compare two sequences of lines; generate the resulting delta.
+ """
+
+ def __init__(self, linejunk=None, charjunk=None):
+ """
+ Construct a text differencer, with optional filters.
+
+ The two optional keyword parameters are for filter functions:
+
+ - `linejunk`: A function that should accept a single string argument,
+ and return true iff the string is junk. The module-level function
+ `IS_LINE_JUNK` may be used to filter out lines without visible
+ characters, except for at most one splat ('#'). It is recommended
+ to leave linejunk None; as of Python 2.3, the underlying
+ SequenceMatcher class has grown an adaptive notion of "noise" lines
+ that's better than any static definition the author has ever been
+ able to craft.
+
+ - `charjunk`: A function that should accept a string of length 1. The
+ module-level function `IS_CHARACTER_JUNK` may be used to filter out
+ whitespace characters (a blank or tab; **note**: bad idea to include
+ newline in this!). Use of IS_CHARACTER_JUNK is recommended.
+ """
+
+ self.linejunk = linejunk
+ self.charjunk = charjunk
+
+ def compare(self, a, b):
+ r"""
+ Compare two sequences of lines; generate the resulting delta.
+
+ Each sequence must contain individual single-line strings ending with
+ newlines. Such sequences can be obtained from the `readlines()` method
+ of file-like objects. The delta generated also consists of newline-
+ terminated strings, ready to be printed as-is via the writeline()
+ method of a file-like object.
+
+ Example:
+
+ >>> print ''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(1),
+ ... 'ore\ntree\nemu\n'.splitlines(1))),
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+
+ cruncher = SequenceMatcher(self.linejunk, a, b)
+ for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
+ if tag == 'replace':
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ elif tag == 'delete':
+ g = self._dump('-', a, alo, ahi)
+ elif tag == 'insert':
+ g = self._dump('+', b, blo, bhi)
+ elif tag == 'equal':
+ g = self._dump(' ', a, alo, ahi)
+ else:
+ raise ValueError, 'unknown tag %r' % (tag,)
+
+ for line in g:
+ yield line
+
+ def _dump(self, tag, x, lo, hi):
+ """Generate comparison results for a same-tagged range."""
+ for i in xrange(lo, hi):
+ yield '%s %s' % (tag, x[i])
+
+ def _plain_replace(self, a, alo, ahi, b, blo, bhi):
+ assert alo < ahi and blo < bhi
+ # dump the shorter block first -- reduces the burden on short-term
+ # memory if the blocks are of very different sizes
+ if bhi - blo < ahi - alo:
+ first = self._dump('+', b, blo, bhi)
+ second = self._dump('-', a, alo, ahi)
+ else:
+ first = self._dump('-', a, alo, ahi)
+ second = self._dump('+', b, blo, bhi)
+
+ for g in first, second:
+ for line in g:
+ yield line
+
+ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
+ r"""
+ When replacing one block of lines with another, search the blocks
+ for *similar* lines; the best-matching pair (if any) is used as a
+ synch point, and intraline difference marking is done on the
+ similar pair. Lots of work, but often worth it.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
+ ... ['abcdefGhijkl\n'], 0, 1)
+ >>> print ''.join(results),
+ - abcDefghiJkl
+ ? ^ ^ ^
+ + abcdefGhijkl
+ ? ^ ^ ^
+ """
+
+ # don't synch up unless the lines have a similarity score of at
+ # least cutoff; best_ratio tracks the best score seen so far
+ best_ratio, cutoff = 0.74, 0.75
+ cruncher = SequenceMatcher(self.charjunk)
+ eqi, eqj = None, None # 1st indices of equal lines (if any)
+
+ # search for the pair that matches best without being identical
+ # (identical lines must be junk lines, & we don't want to synch up
+ # on junk -- unless we have to)
+ for j in xrange(blo, bhi):
+ bj = b[j]
+ cruncher.set_seq2(bj)
+ for i in xrange(alo, ahi):
+ ai = a[i]
+ if ai == bj:
+ if eqi is None:
+ eqi, eqj = i, j
+ continue
+ cruncher.set_seq1(ai)
+ # computing similarity is expensive, so use the quick
+ # upper bounds first -- have seen this speed up messy
+ # compares by a factor of 3.
+ # note that ratio() is only expensive to compute the first
+ # time it's called on a sequence pair; the expensive part
+ # of the computation is cached by cruncher
+ if cruncher.real_quick_ratio() > best_ratio and \
+ cruncher.quick_ratio() > best_ratio and \
+ cruncher.ratio() > best_ratio:
+ best_ratio, best_i, best_j = cruncher.ratio(), i, j
+ if best_ratio < cutoff:
+ # no non-identical "pretty close" pair
+ if eqi is None:
+ # no identical pair either -- treat it as a straight replace
+ for line in self._plain_replace(a, alo, ahi, b, blo, bhi):
+ yield line
+ return
+ # no close pair, but an identical pair -- synch up on that
+ best_i, best_j, best_ratio = eqi, eqj, 1.0
+ else:
+ # there's a close pair, so forget the identical pair (if any)
+ eqi = None
+
+ # a[best_i] very similar to b[best_j]; eqi is None iff they're not
+ # identical
+
+ # pump out diffs from before the synch point
+ for line in self._fancy_helper(a, alo, best_i, b, blo, best_j):
+ yield line
+
+ # do intraline marking on the synch pair
+ aelt, belt = a[best_i], b[best_j]
+ if eqi is None:
+ # pump out a '-', '?', '+', '?' quad for the synched lines
+ atags = btags = ""
+ cruncher.set_seqs(aelt, belt)
+ for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
+ la, lb = ai2 - ai1, bj2 - bj1
+ if tag == 'replace':
+ atags += '^' * la
+ btags += '^' * lb
+ elif tag == 'delete':
+ atags += '-' * la
+ elif tag == 'insert':
+ btags += '+' * lb
+ elif tag == 'equal':
+ atags += ' ' * la
+ btags += ' ' * lb
+ else:
+ raise ValueError, 'unknown tag %r' % (tag,)
+ for line in self._qformat(aelt, belt, atags, btags):
+ yield line
+ else:
+ # the synch pair is identical
+ yield ' ' + aelt
+
+ # pump out diffs from after the synch point
+ for line in self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi):
+ yield line
+
+ def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
+ g = []
+ if alo < ahi:
+ if blo < bhi:
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ else:
+ g = self._dump('-', a, alo, ahi)
+ elif blo < bhi:
+ g = self._dump('+', b, blo, bhi)
+
+ for line in g:
+ yield line
+
+ def _qformat(self, aline, bline, atags, btags):
+ r"""
+ Format "?" output and deal with leading tabs.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
+ ... ' ^ ^ ^ ', ' ^ ^ ^ ')
+ >>> for line in results: print repr(line)
+ ...
+ '- \tabcDefghiJkl\n'
+ '? \t ^ ^ ^\n'
+ '+ \tabcdefGhijkl\n'
+ '? \t ^ ^ ^\n'
+ """
+
+ # Can hurt, but will probably help most of the time.
+ common = min(_count_leading(aline, "\t"),
+ _count_leading(bline, "\t"))
+ common = min(common, _count_leading(atags[:common], " "))
+ common = min(common, _count_leading(btags[:common], " "))
+ atags = atags[common:].rstrip()
+ btags = btags[common:].rstrip()
+
+ yield "- " + aline
+ if atags:
+ yield "? %s%s\n" % ("\t" * common, atags)
+
+ yield "+ " + bline
+ if btags:
+ yield "? %s%s\n" % ("\t" * common, btags)
+
+# With respect to junk, an earlier version of ndiff simply refused to
+# *start* a match with a junk element. The result was cases like this:
+# before: private Thread currentThread;
+# after: private volatile Thread currentThread;
+# If you consider whitespace to be junk, the longest contiguous match
+# not starting with junk is "e Thread currentThread". So ndiff reported
+# that "e volatil" was inserted between the 't' and the 'e' in "private".
+# While an accurate view, to people that's absurd. The current version
+# looks for matching blocks that are entirely junk-free, then extends the
+# longest one of those as far as possible but only with matching junk.
+# So now "currentThread" is matched, then extended to suck up the
+# preceding blank; then "private" is matched, and extended to suck up the
+# following blank; then "Thread" is matched; and finally ndiff reports
+# that "volatile " was inserted before "Thread". The only quibble
+# remaining is that perhaps it was really the case that " volatile"
+# was inserted after "private". I can live with that .
+
+import re
+
+def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match):
+ r"""
+ Return 1 for ignorable line: iff `line` is blank or contains a single '#'.
+
+ Examples:
+
+ >>> IS_LINE_JUNK('\n')
+ True
+ >>> IS_LINE_JUNK(' # \n')
+ True
+ >>> IS_LINE_JUNK('hello\n')
+ False
+ """
+
+ return pat(line) is not None
+
+def IS_CHARACTER_JUNK(ch, ws=" \t"):
+ r"""
+ Return 1 for ignorable character: iff `ch` is a space or tab.
+
+ Examples:
+
+ >>> IS_CHARACTER_JUNK(' ')
+ True
+ >>> IS_CHARACTER_JUNK('\t')
+ True
+ >>> IS_CHARACTER_JUNK('\n')
+ False
+ >>> IS_CHARACTER_JUNK('x')
+ False
+ """
+
+ return ch in ws
+
+
+########################################################################
+### Unified Diff
+########################################################################
+
+def _format_range_unified(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if length == 1:
+ return '{}'.format(beginning)
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ return '{},{}'.format(beginning, length)
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a unified diff.
+
+ Unified diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with ---, +++, or @@) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The unidiff format normally has a header for filenames and modification
+ times. Any or all of these may be specified using strings for
+ 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+
+ Example:
+
+ >>> for line in unified_diff('one two three four'.split(),
+ ... 'zero one tree four'.split(), 'Original', 'Current',
+ ... '2005-01-26 23:30:50', '2010-04-02 10:20:52',
+ ... lineterm=''):
+ ... print line # doctest: +NORMALIZE_WHITESPACE
+ --- Original 2005-01-26 23:30:50
+ +++ Current 2010-04-02 10:20:52
+ @@ -1,4 +1,4 @@
+ +zero
+ one
+ -two
+ -three
+ +tree
+ four
+ """
+
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '+++ {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ file1_range = _format_range_unified(first[1], last[2])
+ file2_range = _format_range_unified(first[3], last[4])
+ yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
+
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag in ('replace', 'delete'):
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag in ('replace', 'insert'):
+ for line in b[j1:j2]:
+ yield '+' + line
+
+
+########################################################################
+### Context Diff
+########################################################################
+
+def _format_range_context(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ if length <= 1:
+ return '{}'.format(beginning)
+ return '{},{}'.format(beginning, beginning + length - 1)
+
+# See http://www.unix.org/single_unix_specification/
+def context_diff(a, b, fromfile='', tofile='',
+ fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a context diff.
+
+ Context diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with *** or ---) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The context diff format normally has a header for filenames and
+ modification times. Any or all of these may be specified using
+ strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+ If not specified, the strings default to blanks.
+
+ Example:
+
+ >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+ ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
+ *** Original
+ --- Current
+ ***************
+ *** 1,4 ****
+ one
+ ! two
+ ! three
+ four
+ --- 1,4 ----
+ + zero
+ one
+ ! tree
+ four
+ """
+
+ prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '--- {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ yield '***************' + lineterm
+
+ file1_range = _format_range_context(first[1], last[2])
+ yield '*** {} ****{}'.format(file1_range, lineterm)
+
+ if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group):
+ for tag, i1, i2, _, _ in group:
+ if tag != 'insert':
+ for line in a[i1:i2]:
+ yield prefix[tag] + line
+
+ file2_range = _format_range_context(first[3], last[4])
+ yield '--- {} ----{}'.format(file2_range, lineterm)
+
+ if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group):
+ for tag, _, _, j1, j2 in group:
+ if tag != 'delete':
+ for line in b[j1:j2]:
+ yield prefix[tag] + line
+
+def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
+ r"""
+ Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
+
+ Optional keyword parameters `linejunk` and `charjunk` are for filter
+ functions (or None):
+
+ - linejunk: A function that should accept a single string argument, and
+ return true iff the string is junk. The default is None, and is
+ recommended; as of Python 2.3, an adaptive notion of "noise" lines is
+ used that does a good job on its own.
+
+ - charjunk: A function that should accept a string of length 1. The
+ default is module-level function IS_CHARACTER_JUNK, which filters out
+ whitespace characters (a blank or tab; note: bad idea to include newline
+ in this!).
+
+ Tools/scripts/ndiff.py is a command-line front-end to this function.
+
+ Example:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
+ ... 'ore\ntree\nemu\n'.splitlines(1))
+ >>> print ''.join(diff),
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+ return Differ(linejunk, charjunk).compare(a, b)
+
+def _mdiff(fromlines, tolines, context=None, linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ r"""Returns generator yielding marked up from/to side by side differences.
+
+ Arguments:
+ fromlines -- list of text lines to compared to tolines
+ tolines -- list of text lines to be compared to fromlines
+ context -- number of context lines to display on each side of difference,
+ if None, all from/to text lines will be generated.
+ linejunk -- passed on to ndiff (see ndiff documentation)
+ charjunk -- passed on to ndiff (see ndiff documentation)
+
+ This function returns an iterator which returns a tuple:
+ (from line tuple, to line tuple, boolean flag)
+
+ from/to line tuple -- (line num, line text)
+ line num -- integer or None (to indicate a context separation)
+ line text -- original line text with following markers inserted:
+ '\0+' -- marks start of added text
+ '\0-' -- marks start of deleted text
+ '\0^' -- marks start of changed text
+ '\1' -- marks end of added/deleted/changed text
+
+ boolean flag -- None indicates context separation, True indicates
+ either "from" or "to" line contains a change, otherwise False.
+
+ This function/iterator was originally developed to generate side by side
+ file difference for making HTML pages (see HtmlDiff class for example
+ usage).
+
+ Note, this function utilizes the ndiff function to generate the side by
+ side difference markup. Optional ndiff arguments may be passed to this
+ function and they in turn will be passed to ndiff.
+ """
+ import re
+
+ # regular expression for finding intraline change indices
+ change_re = re.compile('(\++|\-+|\^+)')
+
+ # create the difference iterator to generate the differences
+ diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk)
+
+ def _make_line(lines, format_key, side, num_lines=[0,0]):
+ """Returns line of text with user's change markup and line formatting.
+
+ lines -- list of lines from the ndiff generator to produce a line of
+ text from. When producing the line of text to return, the
+ lines used are removed from this list.
+ format_key -- '+' return first line in list with "add" markup around
+ the entire line.
+ '-' return first line in list with "delete" markup around
+ the entire line.
+ '?' return first line in list with add/delete/change
+ intraline markup (indices obtained from second line)
+ None return first line in list with no markup
+ side -- indice into the num_lines list (0=from,1=to)
+ num_lines -- from/to current line number. This is NOT intended to be a
+ passed parameter. It is present as a keyword argument to
+ maintain memory of the current line numbers between calls
+ of this function.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ num_lines[side] += 1
+ # Handle case where no user markup is to be added, just return line of
+ # text with user's line format to allow for usage of the line number.
+ if format_key is None:
+ return (num_lines[side],lines.pop(0)[2:])
+ # Handle case of intraline changes
+ if format_key == '?':
+ text, markers = lines.pop(0), lines.pop(0)
+ # find intraline changes (store change type and indices in tuples)
+ sub_info = []
+ def record_sub_info(match_object,sub_info=sub_info):
+ sub_info.append([match_object.group(1)[0],match_object.span()])
+ return match_object.group(1)
+ change_re.sub(record_sub_info,markers)
+ # process each tuple inserting our special marks that won't be
+ # noticed by an xml/html escaper.
+ for key,(begin,end) in sub_info[::-1]:
+ text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:]
+ text = text[2:]
+ # Handle case of add/delete entire line
+ else:
+ text = lines.pop(0)[2:]
+ # if line of text is just a newline, insert a space so there is
+ # something for the user to highlight and see.
+ if not text:
+ text = ' '
+ # insert marks that won't be noticed by an xml/html escaper.
+ text = '\0' + format_key + text + '\1'
+ # Return line of text, first allow user's line formatter to do its
+ # thing (such as adding the line number) then replace the special
+ # marks with what the user's change markup.
+ return (num_lines[side],text)
+
+ def _line_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from a
+ differencing iterator, processes them and yields them. When it can
+ it yields both a "from" and a "to" line, otherwise it will yield one
+ or the other. In addition to yielding the lines of from/to text, a
+ boolean flag is yielded to indicate if the text line(s) have
+ differences in them.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ lines = []
+ num_blanks_pending, num_blanks_to_yield = 0, 0
+ while True:
+ # Load up next 4 lines so we can look ahead, create strings which
+ # are a concatenation of the first character of each of the 4 lines
+ # so we can do some very readable comparisons.
+ while len(lines) < 4:
+ try:
+ lines.append(diff_lines_iterator.next())
+ except StopIteration:
+ lines.append('X')
+ s = ''.join([line[0] for line in lines])
+ if s.startswith('X'):
+ # When no more lines, pump out any remaining blank lines so the
+ # corresponding add/delete lines get a matching blank line so
+ # all line pairs get yielded at the next level.
+ num_blanks_to_yield = num_blanks_pending
+ elif s.startswith('-?+?'):
+ # simple intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('--++'):
+ # in delete block, add block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the delete line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith(('--?+', '--+', '- ')):
+ # in delete block and see an intraline change or unchanged line
+ # coming: yield the delete line and then blanks
+ from_line,to_line = _make_line(lines,'-',0), None
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0
+ elif s.startswith('-+?'):
+ # intraline change
+ yield _make_line(lines,None,0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('-?+'):
+ # intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,None,1), True
+ continue
+ elif s.startswith('-'):
+ # delete FROM line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith('+--'):
+ # in add block, delete block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(('+ ', '+-')):
+ # will be leaving an add block: yield blanks then add line
+ from_line, to_line = None, _make_line(lines,'+',1)
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0
+ elif s.startswith('+'):
+ # inside an add block, yield the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(' '):
+ # unchanged text, yield it to both sides
+ yield _make_line(lines[:],None,0),_make_line(lines,None,1),False
+ continue
+ # Catch up on the blank lines so when we yield the next from/to
+ # pair, they are lined up.
+ while(num_blanks_to_yield < 0):
+ num_blanks_to_yield += 1
+ yield None,('','\n'),True
+ while(num_blanks_to_yield > 0):
+ num_blanks_to_yield -= 1
+ yield ('','\n'),None,True
+ if s.startswith('X'):
+ raise StopIteration
+ else:
+ yield from_line,to_line,True
+
+ def _line_pair_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from the line
+ iterator. Its difference from that iterator is that this function
+ always yields a pair of from/to text lines (with the change
+ indication). If necessary it will collect single from/to lines
+ until it has a matching pair from/to pair to yield.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ line_iterator = _line_iterator()
+ fromlines,tolines=[],[]
+ while True:
+ # Collecting lines of text until we have a from/to pair
+ while (len(fromlines)==0 or len(tolines)==0):
+ from_line, to_line, found_diff =line_iterator.next()
+ if from_line is not None:
+ fromlines.append((from_line,found_diff))
+ if to_line is not None:
+ tolines.append((to_line,found_diff))
+ # Once we have a pair, remove them from the collection and yield it
+ from_line, fromDiff = fromlines.pop(0)
+ to_line, to_diff = tolines.pop(0)
+ yield (from_line,to_line,fromDiff or to_diff)
+
+ # Handle case where user does not want context differencing, just yield
+ # them up without doing anything else with them.
+ line_pair_iterator = _line_pair_iterator()
+ if context is None:
+ while True:
+ yield line_pair_iterator.next()
+ # Handle case where user wants context differencing. We must do some
+ # storage of lines until we know for sure that they are to be yielded.
+ else:
+ context += 1
+ lines_to_write = 0
+ while True:
+ # Store lines up until we find a difference, note use of a
+ # circular queue because we only need to keep around what
+ # we need for context.
+ index, contextLines = 0, [None]*(context)
+ found_diff = False
+ while(found_diff is False):
+ from_line, to_line, found_diff = line_pair_iterator.next()
+ i = index % context
+ contextLines[i] = (from_line, to_line, found_diff)
+ index += 1
+ # Yield lines that we have collected so far, but first yield
+ # the user's separator.
+ if index > context:
+ yield None, None, None
+ lines_to_write = context
+ else:
+ lines_to_write = index
+ index = 0
+ while(lines_to_write):
+ i = index % context
+ index += 1
+ yield contextLines[i]
+ lines_to_write -= 1
+ # Now yield the context lines after the change
+ lines_to_write = context-1
+ while(lines_to_write):
+ from_line, to_line, found_diff = line_pair_iterator.next()
+ # If another change within the context, extend the context
+ if found_diff:
+ lines_to_write = context-1
+ else:
+ lines_to_write -= 1
+ yield from_line, to_line, found_diff
+
+
+_file_template = """
+
+
+
+
+
+
+
+
+
+
+
+ %(table)s%(legend)s
+
+
+"""
+
+_styles = """
+ table.diff {font-family:Courier; border:medium;}
+ .diff_header {background-color:#e0e0e0}
+ td.diff_header {text-align:right}
+ .diff_next {background-color:#c0c0c0}
+ .diff_add {background-color:#aaffaa}
+ .diff_chg {background-color:#ffff77}
+ .diff_sub {background-color:#ffaaaa}"""
+
+_table_template = """
+
+
+
+ %(header_row)s
+
+%(data_rows)s
+
"""
+
+_legend = """
+
+ Legends
+
+ Colors
+ Added
+ Changed
+ Deleted
+
+
+ Links
+ (f)irst change
+ (n)ext change
+ (t)op
+
+
"""
+
+class HtmlDiff(object):
+ """For producing HTML side by side comparison with change highlights.
+
+ This class can be used to create an HTML table (or a complete HTML file
+ containing the table) showing a side by side, line by line comparison
+ of text with inter-line and intra-line change highlights. The table can
+ be generated in either full or contextual difference mode.
+
+ The following methods are provided for HTML generation:
+
+ make_table -- generates HTML for a single side by side table
+ make_file -- generates complete HTML file with a single side by side table
+
+ See tools/scripts/diff.py for an example usage of this class.
+ """
+
+ _file_template = _file_template
+ _styles = _styles
+ _table_template = _table_template
+ _legend = _legend
+ _default_prefix = 0
+
+ def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ """HtmlDiff instance initializer
+
+ Arguments:
+ tabsize -- tab stop spacing, defaults to 8.
+ wrapcolumn -- column number where lines are broken and wrapped,
+ defaults to None where lines are not wrapped.
+ linejunk,charjunk -- keyword arguments passed into ndiff() (used to by
+ HtmlDiff() to generate the side by side HTML differences). See
+ ndiff() documentation for argument default values and descriptions.
+ """
+ self._tabsize = tabsize
+ self._wrapcolumn = wrapcolumn
+ self._linejunk = linejunk
+ self._charjunk = charjunk
+
+ def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5):
+ """Returns HTML file of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ """
+
+ return self._file_template % dict(
+ styles = self._styles,
+ legend = self._legend,
+ table = self.make_table(fromlines,tolines,fromdesc,todesc,
+ context=context,numlines=numlines))
+
+ def _tab_newline_replace(self,fromlines,tolines):
+ """Returns from/to line lists with tabs expanded and newlines removed.
+
+ Instead of tab characters being replaced by the number of spaces
+ needed to fill in to the next tab stop, this function will fill
+ the space with tab characters. This is done so that the difference
+ algorithms can identify changes in a file when tabs are replaced by
+ spaces and vice versa. At the end of the HTML generation, the tab
+ characters will be replaced with a nonbreakable space.
+ """
+ def expand_tabs(line):
+ # hide real spaces
+ line = line.replace(' ','\0')
+ # expand tabs into spaces
+ line = line.expandtabs(self._tabsize)
+ # replace spaces from expanded tabs back into tab characters
+ # (we'll replace them with markup after we do differencing)
+ line = line.replace(' ','\t')
+ return line.replace('\0',' ').rstrip('\n')
+ fromlines = [expand_tabs(line) for line in fromlines]
+ tolines = [expand_tabs(line) for line in tolines]
+ return fromlines,tolines
+
+ def _split_line(self,data_list,line_num,text):
+ """Builds list of text lines by splitting text lines at wrap point
+
+ This function will determine if the input text line needs to be
+ wrapped (split) into separate lines. If so, the first wrap point
+ will be determined and the first line appended to the output
+ text line list. This function is used recursively to handle
+ the second part of the split line to further split it.
+ """
+ # if blank line or context separator, just add it to the output list
+ if not line_num:
+ data_list.append((line_num,text))
+ return
+
+ # if line text doesn't need wrapping, just add it to the output list
+ size = len(text)
+ max = self._wrapcolumn
+ if (size <= max) or ((size -(text.count('\0')*3)) <= max):
+ data_list.append((line_num,text))
+ return
+
+ # scan text looking for the wrap point, keeping track if the wrap
+ # point is inside markers
+ i = 0
+ n = 0
+ mark = ''
+ while n < max and i < size:
+ if text[i] == '\0':
+ i += 1
+ mark = text[i]
+ i += 1
+ elif text[i] == '\1':
+ i += 1
+ mark = ''
+ else:
+ i += 1
+ n += 1
+
+ # wrap point is inside text, break it up into separate lines
+ line1 = text[:i]
+ line2 = text[i:]
+
+ # if wrap point is inside markers, place end marker at end of first
+ # line and start marker at beginning of second line because each
+ # line will have its own table tag markup around it.
+ if mark:
+ line1 = line1 + '\1'
+ line2 = '\0' + mark + line2
+
+ # tack on first line onto the output list
+ data_list.append((line_num,line1))
+
+ # use this routine again to wrap the remaining text
+ self._split_line(data_list,'>',line2)
+
+ def _line_wrapper(self,diffs):
+ """Returns iterator that splits (wraps) mdiff text lines"""
+
+ # pull from/to data and flags from mdiff iterator
+ for fromdata,todata,flag in diffs:
+ # check for context separators and pass them through
+ if flag is None:
+ yield fromdata,todata,flag
+ continue
+ (fromline,fromtext),(toline,totext) = fromdata,todata
+ # for each from/to line split it at the wrap column to form
+ # list of text lines.
+ fromlist,tolist = [],[]
+ self._split_line(fromlist,fromline,fromtext)
+ self._split_line(tolist,toline,totext)
+ # yield from/to line in pairs inserting blank lines as
+ # necessary when one side has more wrapped lines
+ while fromlist or tolist:
+ if fromlist:
+ fromdata = fromlist.pop(0)
+ else:
+ fromdata = ('',' ')
+ if tolist:
+ todata = tolist.pop(0)
+ else:
+ todata = ('',' ')
+ yield fromdata,todata,flag
+
+ def _collect_lines(self,diffs):
+ """Collects mdiff output into separate lists
+
+ Before storing the mdiff from/to data into a list, it is converted
+ into a single line of text with HTML markup.
+ """
+
+ fromlist,tolist,flaglist = [],[],[]
+ # pull from/to data and flags from mdiff style iterator
+ for fromdata,todata,flag in diffs:
+ try:
+ # store HTML markup of the lines into the lists
+ fromlist.append(self._format_line(0,flag,*fromdata))
+ tolist.append(self._format_line(1,flag,*todata))
+ except TypeError:
+ # exceptions occur for lines where context separators go
+ fromlist.append(None)
+ tolist.append(None)
+ flaglist.append(flag)
+ return fromlist,tolist,flaglist
+
+ def _format_line(self,side,flag,linenum,text):
+ """Returns HTML markup of "from" / "to" text lines
+
+ side -- 0 or 1 indicating "from" or "to" text
+ flag -- indicates if difference on line
+ linenum -- line number (used for line number column)
+ text -- line text to be marked up
+ """
+ try:
+ linenum = '%d' % linenum
+ id = ' id="%s%s"' % (self._prefix[side],linenum)
+ except TypeError:
+ # handle blank lines where linenum is '>' or ''
+ id = ''
+ # replace those things that would get confused with HTML symbols
+ text=text.replace("&","&").replace(">",">").replace("<","<")
+
+ # make space non-breakable so they don't get compressed or line wrapped
+ text = text.replace(' ',' ').rstrip()
+
+ return '%s ' \
+ % (id,linenum,text)
+
+ def _make_prefix(self):
+ """Create unique anchor prefixes"""
+
+ # Generate a unique anchor prefix so multiple tables
+ # can exist on the same HTML page without conflicts.
+ fromprefix = "from%d_" % HtmlDiff._default_prefix
+ toprefix = "to%d_" % HtmlDiff._default_prefix
+ HtmlDiff._default_prefix += 1
+ # store prefixes so line format method has access
+ self._prefix = [fromprefix,toprefix]
+
+ def _convert_flags(self,fromlist,tolist,flaglist,context,numlines):
+ """Makes list of "next" links"""
+
+ # all anchor names will be generated using the unique "to" prefix
+ toprefix = self._prefix[1]
+
+ # process change flags, generating middle column of next anchors/links
+ next_id = ['']*len(flaglist)
+ next_href = ['']*len(flaglist)
+ num_chg, in_change = 0, False
+ last = 0
+ for i,flag in enumerate(flaglist):
+ if flag:
+ if not in_change:
+ in_change = True
+ last = i
+ # at the beginning of a change, drop an anchor a few lines
+ # (the context lines) before the change for the previous
+ # link
+ i = max([0,i-numlines])
+ next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
+ # at the beginning of a change, drop a link to the next
+ # change
+ num_chg += 1
+ next_href[last] = 'n ' % (
+ toprefix,num_chg)
+ else:
+ in_change = False
+ # check for cases where there is no content to avoid exceptions
+ if not flaglist:
+ flaglist = [False]
+ next_id = ['']
+ next_href = ['']
+ last = 0
+ if context:
+ fromlist = [' No Differences Found ']
+ tolist = fromlist
+ else:
+ fromlist = tolist = [' Empty File ']
+ # if not a change on first line, drop a link
+ if not flaglist[0]:
+ next_href[0] = 'f ' % toprefix
+ # redo the last link to link to the top
+ next_href[last] = 't ' % (toprefix)
+
+ return fromlist,tolist,flaglist,next_href,next_id
+
+ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5):
+ """Returns HTML table of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ """
+
+ # make unique anchor prefixes so that multiple tables may exist
+ # on the same page without conflict.
+ self._make_prefix()
+
+ # change tabs to spaces before it gets more difficult after we insert
+ # markup
+ fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
+
+ # create diffs iterator which generates side by side from/to data
+ if context:
+ context_lines = numlines
+ else:
+ context_lines = None
+ diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk,
+ charjunk=self._charjunk)
+
+ # set up iterator to wrap lines that exceed desired width
+ if self._wrapcolumn:
+ diffs = self._line_wrapper(diffs)
+
+ # collect up from/to lines and flags into lists (also format the lines)
+ fromlist,tolist,flaglist = self._collect_lines(diffs)
+
+ # process change flags, generating middle column of next anchors/links
+ fromlist,tolist,flaglist,next_href,next_id = self._convert_flags(
+ fromlist,tolist,flaglist,context,numlines)
+
+ s = []
+ fmt = ' %s %s' + \
+ '%s %s \n'
+ for i in range(len(flaglist)):
+ if flaglist[i] is None:
+ # mdiff yields None on separator lines skip the bogus ones
+ # generated for the first line
+ if i > 0:
+ s.append(' \n \n')
+ else:
+ s.append( fmt % (next_id[i],next_href[i],fromlist[i],
+ next_href[i],tolist[i]))
+ if fromdesc or todesc:
+ header_row = '%s%s%s%s ' % (
+ ' ',
+ '' % fromdesc,
+ ' ',
+ '' % todesc)
+ else:
+ header_row = ''
+
+ table = self._table_template % dict(
+ data_rows=''.join(s),
+ header_row=header_row,
+ prefix=self._prefix[1])
+
+ return table.replace('\0+',''). \
+ replace('\0-',''). \
+ replace('\0^',''). \
+ replace('\1',' '). \
+ replace('\t',' ')
+
+del re
+
+def restore(delta, which):
+ r"""
+ Generate one of the two sequences that generated a delta.
+
+ Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
+ lines originating from file 1 or 2 (parameter `which`), stripping off line
+ prefixes.
+
+ Examples:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
+ ... 'ore\ntree\nemu\n'.splitlines(1))
+ >>> diff = list(diff)
+ >>> print ''.join(restore(diff, 1)),
+ one
+ two
+ three
+ >>> print ''.join(restore(diff, 2)),
+ ore
+ tree
+ emu
+ """
+ try:
+ tag = {1: "- ", 2: "+ "}[int(which)]
+ except KeyError:
+ raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
+ % which)
+ prefixes = (" ", tag)
+ for line in delta:
+ if line[:2] in prefixes:
+ yield line[2:]
+
+def _test():
+ import doctest, difflib
+ return doctest.testmod(difflib)
+
+if __name__ == "__main__":
+ _test()
diff --git a/cashew/Lib/dircache.py b/cashew/Lib/dircache.py
new file mode 100644
index 0000000..7e4f0b5
--- /dev/null
+++ b/cashew/Lib/dircache.py
@@ -0,0 +1,41 @@
+"""Read and cache directory listings.
+
+The listdir() routine returns a sorted list of the files in a directory,
+using a cache to avoid reading the directory more often than necessary.
+The annotate() routine appends slashes to directories."""
+from warnings import warnpy3k
+warnpy3k("the dircache module has been removed in Python 3.0", stacklevel=2)
+del warnpy3k
+
+import os
+
+__all__ = ["listdir", "opendir", "annotate", "reset"]
+
+cache = {}
+
+def reset():
+ """Reset the cache completely."""
+ global cache
+ cache = {}
+
+def listdir(path):
+ """List directory contents, using cache."""
+ try:
+ cached_mtime, list = cache[path]
+ del cache[path]
+ except KeyError:
+ cached_mtime, list = -1, []
+ mtime = os.stat(path).st_mtime
+ if mtime != cached_mtime:
+ list = os.listdir(path)
+ list.sort()
+ cache[path] = mtime, list
+ return list
+
+opendir = listdir # XXX backward compatibility
+
+def annotate(head, list):
+ """Add '/' suffixes to directories."""
+ for i in range(len(list)):
+ if os.path.isdir(os.path.join(head, list[i])):
+ list[i] = list[i] + '/'
diff --git a/cashew/Lib/dis.py b/cashew/Lib/dis.py
new file mode 100644
index 0000000..5aa09c9
--- /dev/null
+++ b/cashew/Lib/dis.py
@@ -0,0 +1,224 @@
+"""Disassembler of Python byte code into mnemonics."""
+
+import sys
+import types
+
+from opcode import *
+from opcode import __all__ as _opcodes_all
+
+__all__ = ["dis", "disassemble", "distb", "disco",
+ "findlinestarts", "findlabels"] + _opcodes_all
+del _opcodes_all
+
+_have_code = (types.MethodType, types.FunctionType, types.CodeType,
+ types.ClassType, type)
+
+def dis(x=None):
+ """Disassemble classes, methods, functions, or code.
+
+ With no argument, disassemble the last traceback.
+
+ """
+ if x is None:
+ distb()
+ return
+ if isinstance(x, types.InstanceType):
+ x = x.__class__
+ if hasattr(x, 'im_func'):
+ x = x.im_func
+ if hasattr(x, 'func_code'):
+ x = x.func_code
+ if hasattr(x, '__dict__'):
+ items = x.__dict__.items()
+ items.sort()
+ for name, x1 in items:
+ if isinstance(x1, _have_code):
+ print "Disassembly of %s:" % name
+ try:
+ dis(x1)
+ except TypeError, msg:
+ print "Sorry:", msg
+ print
+ elif hasattr(x, 'co_code'):
+ disassemble(x)
+ elif isinstance(x, str):
+ disassemble_string(x)
+ else:
+ raise TypeError, \
+ "don't know how to disassemble %s objects" % \
+ type(x).__name__
+
+def distb(tb=None):
+ """Disassemble a traceback (default: last traceback)."""
+ if tb is None:
+ try:
+ tb = sys.last_traceback
+ except AttributeError:
+ raise RuntimeError, "no last traceback to disassemble"
+ while tb.tb_next: tb = tb.tb_next
+ disassemble(tb.tb_frame.f_code, tb.tb_lasti)
+
+def disassemble(co, lasti=-1):
+ """Disassemble a code object."""
+ code = co.co_code
+ labels = findlabels(code)
+ linestarts = dict(findlinestarts(co))
+ n = len(code)
+ i = 0
+ extended_arg = 0
+ free = None
+ while i < n:
+ c = code[i]
+ op = ord(c)
+ if i in linestarts:
+ if i > 0:
+ print
+ print "%3d" % linestarts[i],
+ else:
+ print ' ',
+
+ if i == lasti: print '-->',
+ else: print ' ',
+ if i in labels: print '>>',
+ else: print ' ',
+ print repr(i).rjust(4),
+ print opname[op].ljust(20),
+ i = i+1
+ if op >= HAVE_ARGUMENT:
+ oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
+ extended_arg = 0
+ i = i+2
+ if op == EXTENDED_ARG:
+ extended_arg = oparg*65536L
+ print repr(oparg).rjust(5),
+ if op in hasconst:
+ print '(' + repr(co.co_consts[oparg]) + ')',
+ elif op in hasname:
+ print '(' + co.co_names[oparg] + ')',
+ elif op in hasjrel:
+ print '(to ' + repr(i + oparg) + ')',
+ elif op in haslocal:
+ print '(' + co.co_varnames[oparg] + ')',
+ elif op in hascompare:
+ print '(' + cmp_op[oparg] + ')',
+ elif op in hasfree:
+ if free is None:
+ free = co.co_cellvars + co.co_freevars
+ print '(' + free[oparg] + ')',
+ print
+
+def disassemble_string(code, lasti=-1, varnames=None, names=None,
+ constants=None):
+ labels = findlabels(code)
+ n = len(code)
+ i = 0
+ while i < n:
+ c = code[i]
+ op = ord(c)
+ if i == lasti: print '-->',
+ else: print ' ',
+ if i in labels: print '>>',
+ else: print ' ',
+ print repr(i).rjust(4),
+ print opname[op].ljust(15),
+ i = i+1
+ if op >= HAVE_ARGUMENT:
+ oparg = ord(code[i]) + ord(code[i+1])*256
+ i = i+2
+ print repr(oparg).rjust(5),
+ if op in hasconst:
+ if constants:
+ print '(' + repr(constants[oparg]) + ')',
+ else:
+ print '(%d)'%oparg,
+ elif op in hasname:
+ if names is not None:
+ print '(' + names[oparg] + ')',
+ else:
+ print '(%d)'%oparg,
+ elif op in hasjrel:
+ print '(to ' + repr(i + oparg) + ')',
+ elif op in haslocal:
+ if varnames:
+ print '(' + varnames[oparg] + ')',
+ else:
+ print '(%d)' % oparg,
+ elif op in hascompare:
+ print '(' + cmp_op[oparg] + ')',
+ print
+
+disco = disassemble # XXX For backwards compatibility
+
+def findlabels(code):
+ """Detect all offsets in a byte code which are jump targets.
+
+ Return the list of offsets.
+
+ """
+ labels = []
+ n = len(code)
+ i = 0
+ while i < n:
+ c = code[i]
+ op = ord(c)
+ i = i+1
+ if op >= HAVE_ARGUMENT:
+ oparg = ord(code[i]) + ord(code[i+1])*256
+ i = i+2
+ label = -1
+ if op in hasjrel:
+ label = i+oparg
+ elif op in hasjabs:
+ label = oparg
+ if label >= 0:
+ if label not in labels:
+ labels.append(label)
+ return labels
+
+def findlinestarts(code):
+ """Find the offsets in a byte code which are start of lines in the source.
+
+ Generate pairs (offset, lineno) as described in Python/compile.c.
+
+ """
+ byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
+ line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+
+ lastlineno = None
+ lineno = code.co_firstlineno
+ addr = 0
+ for byte_incr, line_incr in zip(byte_increments, line_increments):
+ if byte_incr:
+ if lineno != lastlineno:
+ yield (addr, lineno)
+ lastlineno = lineno
+ addr += byte_incr
+ lineno += line_incr
+ if lineno != lastlineno:
+ yield (addr, lineno)
+
+def _test():
+ """Simple test program to disassemble a file."""
+ if sys.argv[1:]:
+ if sys.argv[2:]:
+ sys.stderr.write("usage: python dis.py [-|file]\n")
+ sys.exit(2)
+ fn = sys.argv[1]
+ if not fn or fn == "-":
+ fn = None
+ else:
+ fn = None
+ if fn is None:
+ f = sys.stdin
+ else:
+ f = open(fn)
+ source = f.read()
+ if fn is not None:
+ f.close()
+ else:
+ fn = ""
+ code = compile(source, fn, "exec")
+ dis(code)
+
+if __name__ == "__main__":
+ _test()
diff --git a/cashew/Lib/distutils/__init__.py b/cashew/Lib/distutils/__init__.py
new file mode 100644
index 0000000..d823d04
--- /dev/null
+++ b/cashew/Lib/distutils/__init__.py
@@ -0,0 +1,13 @@
+"""distutils
+
+The main package for the Python Module Distribution Utilities. Normally
+used from a setup script as
+
+ from distutils.core import setup
+
+ setup (...)
+"""
+
+import sys
+
+__version__ = sys.version[:sys.version.index(' ')]
diff --git a/cashew/Lib/distutils/archive_util.py b/cashew/Lib/distutils/archive_util.py
new file mode 100644
index 0000000..834b722
--- /dev/null
+++ b/cashew/Lib/distutils/archive_util.py
@@ -0,0 +1,243 @@
+"""distutils.archive_util
+
+Utility functions for creating archive files (tarballs, zip files,
+that sort of thing)."""
+
+__revision__ = "$Id$"
+
+import os
+from warnings import warn
+import sys
+
+from distutils.errors import DistutilsExecError
+from distutils.spawn import spawn
+from distutils.dir_util import mkpath
+from distutils import log
+
+try:
+ from pwd import getpwnam
+except ImportError:
+ getpwnam = None
+
+try:
+ from grp import getgrnam
+except ImportError:
+ getgrnam = None
+
+def _get_gid(name):
+ """Returns a gid, given a group name."""
+ if getgrnam is None or name is None:
+ return None
+ try:
+ result = getgrnam(name)
+ except KeyError:
+ result = None
+ if result is not None:
+ return result[2]
+ return None
+
+def _get_uid(name):
+ """Returns an uid, given a user name."""
+ if getpwnam is None or name is None:
+ return None
+ try:
+ result = getpwnam(name)
+ except KeyError:
+ result = None
+ if result is not None:
+ return result[2]
+ return None
+
+def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
+ owner=None, group=None):
+ """Create a (possibly compressed) tar file from all the files under
+ 'base_dir'.
+
+ 'compress' must be "gzip" (the default), "compress", "bzip2", or None.
+ (compress will be deprecated in Python 3.2)
+
+ 'owner' and 'group' can be used to define an owner and a group for the
+ archive that is being built. If not provided, the current owner and group
+ will be used.
+
+ The output tar file will be named 'base_dir' + ".tar", possibly plus
+ the appropriate compression extension (".gz", ".bz2" or ".Z").
+
+ Returns the output filename.
+ """
+ tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: '', 'compress': ''}
+ compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'compress': '.Z'}
+
+ # flags for compression program, each element of list will be an argument
+ if compress is not None and compress not in compress_ext.keys():
+ raise ValueError, \
+ ("bad value for 'compress': must be None, 'gzip', 'bzip2' "
+ "or 'compress'")
+
+ archive_name = base_name + '.tar'
+ if compress != 'compress':
+ archive_name += compress_ext.get(compress, '')
+
+ mkpath(os.path.dirname(archive_name), dry_run=dry_run)
+
+ # creating the tarball
+ import tarfile # late import so Python build itself doesn't break
+
+ log.info('Creating tar archive')
+
+ uid = _get_uid(owner)
+ gid = _get_gid(group)
+
+ def _set_uid_gid(tarinfo):
+ if gid is not None:
+ tarinfo.gid = gid
+ tarinfo.gname = group
+ if uid is not None:
+ tarinfo.uid = uid
+ tarinfo.uname = owner
+ return tarinfo
+
+ if not dry_run:
+ tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
+ try:
+ tar.add(base_dir, filter=_set_uid_gid)
+ finally:
+ tar.close()
+
+ # compression using `compress`
+ if compress == 'compress':
+ warn("'compress' will be deprecated.", PendingDeprecationWarning)
+ # the option varies depending on the platform
+ compressed_name = archive_name + compress_ext[compress]
+ if sys.platform == 'win32':
+ cmd = [compress, archive_name, compressed_name]
+ else:
+ cmd = [compress, '-f', archive_name]
+ spawn(cmd, dry_run=dry_run)
+ return compressed_name
+
+ return archive_name
+
+def make_zipfile(base_name, base_dir, verbose=0, dry_run=0):
+ """Create a zip file from all the files under 'base_dir'.
+
+ The output zip file will be named 'base_name' + ".zip". Uses either the
+ "zipfile" Python module (if available) or the InfoZIP "zip" utility
+ (if installed and found on the default search path). If neither tool is
+ available, raises DistutilsExecError. Returns the name of the output zip
+ file.
+ """
+ try:
+ import zipfile
+ except ImportError:
+ zipfile = None
+
+ zip_filename = base_name + ".zip"
+ mkpath(os.path.dirname(zip_filename), dry_run=dry_run)
+
+ # If zipfile module is not available, try spawning an external
+ # 'zip' command.
+ if zipfile is None:
+ if verbose:
+ zipoptions = "-r"
+ else:
+ zipoptions = "-rq"
+
+ try:
+ spawn(["zip", zipoptions, zip_filename, base_dir],
+ dry_run=dry_run)
+ except DistutilsExecError:
+ # XXX really should distinguish between "couldn't find
+ # external 'zip' command" and "zip failed".
+ raise DistutilsExecError, \
+ ("unable to create zip file '%s': "
+ "could neither import the 'zipfile' module nor "
+ "find a standalone zip utility") % zip_filename
+
+ else:
+ log.info("creating '%s' and adding '%s' to it",
+ zip_filename, base_dir)
+
+ if not dry_run:
+ zip = zipfile.ZipFile(zip_filename, "w",
+ compression=zipfile.ZIP_DEFLATED)
+
+ for dirpath, dirnames, filenames in os.walk(base_dir):
+ for name in filenames:
+ path = os.path.normpath(os.path.join(dirpath, name))
+ if os.path.isfile(path):
+ zip.write(path, path)
+ log.info("adding '%s'" % path)
+ zip.close()
+
+ return zip_filename
+
+ARCHIVE_FORMATS = {
+ 'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
+ 'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
+ 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"),
+ 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"),
+ 'zip': (make_zipfile, [],"ZIP file")
+ }
+
+def check_archive_formats(formats):
+ """Returns the first format from the 'format' list that is unknown.
+
+ If all formats are known, returns None
+ """
+ for format in formats:
+ if format not in ARCHIVE_FORMATS:
+ return format
+ return None
+
+def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
+ dry_run=0, owner=None, group=None):
+ """Create an archive file (eg. zip or tar).
+
+ 'base_name' is the name of the file to create, minus any format-specific
+ extension; 'format' is the archive format: one of "zip", "tar", "ztar",
+ or "gztar".
+
+ 'root_dir' is a directory that will be the root directory of the
+ archive; ie. we typically chdir into 'root_dir' before creating the
+ archive. 'base_dir' is the directory where we start archiving from;
+ ie. 'base_dir' will be the common prefix of all files and
+ directories in the archive. 'root_dir' and 'base_dir' both default
+ to the current directory. Returns the name of the archive file.
+
+ 'owner' and 'group' are used when creating a tar archive. By default,
+ uses the current owner and group.
+ """
+ save_cwd = os.getcwd()
+ if root_dir is not None:
+ log.debug("changing into '%s'", root_dir)
+ base_name = os.path.abspath(base_name)
+ if not dry_run:
+ os.chdir(root_dir)
+
+ if base_dir is None:
+ base_dir = os.curdir
+
+ kwargs = {'dry_run': dry_run}
+
+ try:
+ format_info = ARCHIVE_FORMATS[format]
+ except KeyError:
+ raise ValueError, "unknown archive format '%s'" % format
+
+ func = format_info[0]
+ for arg, val in format_info[1]:
+ kwargs[arg] = val
+
+ if format != 'zip':
+ kwargs['owner'] = owner
+ kwargs['group'] = group
+
+ try:
+ filename = func(base_name, base_dir, **kwargs)
+ finally:
+ if root_dir is not None:
+ log.debug("changing back to '%s'", save_cwd)
+ os.chdir(save_cwd)
+
+ return filename
diff --git a/cashew/Lib/distutils/bcppcompiler.py b/cashew/Lib/distutils/bcppcompiler.py
new file mode 100644
index 0000000..f26e7ae
--- /dev/null
+++ b/cashew/Lib/distutils/bcppcompiler.py
@@ -0,0 +1,394 @@
+"""distutils.bcppcompiler
+
+Contains BorlandCCompiler, an implementation of the abstract CCompiler class
+for the Borland C++ compiler.
+"""
+
+# This implementation by Lyle Johnson, based on the original msvccompiler.py
+# module and using the directions originally published by Gordon Williams.
+
+# XXX looks like there's a LOT of overlap between these two classes:
+# someone should sit down and factor out the common code as
+# WindowsCCompiler! --GPW
+
+__revision__ = "$Id$"
+
+import os
+
+from distutils.errors import (DistutilsExecError, CompileError, LibError,
+ LinkError, UnknownFileError)
+from distutils.ccompiler import CCompiler, gen_preprocess_options
+from distutils.file_util import write_file
+from distutils.dep_util import newer
+from distutils import log
+
+class BCPPCompiler(CCompiler) :
+ """Concrete class that implements an interface to the Borland C/C++
+ compiler, as defined by the CCompiler abstract class.
+ """
+
+ compiler_type = 'bcpp'
+
+ # Just set this so CCompiler's constructor doesn't barf. We currently
+ # don't use the 'set_executables()' bureaucracy provided by CCompiler,
+ # as it really isn't necessary for this sort of single-compiler class.
+ # Would be nice to have a consistent interface with UnixCCompiler,
+ # though, so it's worth thinking about.
+ executables = {}
+
+ # Private class data (need to distinguish C from C++ source for compiler)
+ _c_extensions = ['.c']
+ _cpp_extensions = ['.cc', '.cpp', '.cxx']
+
+ # Needed for the filename generation methods provided by the
+ # base class, CCompiler.
+ src_extensions = _c_extensions + _cpp_extensions
+ obj_extension = '.obj'
+ static_lib_extension = '.lib'
+ shared_lib_extension = '.dll'
+ static_lib_format = shared_lib_format = '%s%s'
+ exe_extension = '.exe'
+
+
+ def __init__ (self,
+ verbose=0,
+ dry_run=0,
+ force=0):
+
+ CCompiler.__init__ (self, verbose, dry_run, force)
+
+ # These executables are assumed to all be in the path.
+ # Borland doesn't seem to use any special registry settings to
+ # indicate their installation locations.
+
+ self.cc = "bcc32.exe"
+ self.linker = "ilink32.exe"
+ self.lib = "tlib.exe"
+
+ self.preprocess_options = None
+ self.compile_options = ['/tWM', '/O2', '/q', '/g0']
+ self.compile_options_debug = ['/tWM', '/Od', '/q', '/g0']
+
+ self.ldflags_shared = ['/Tpd', '/Gn', '/q', '/x']
+ self.ldflags_shared_debug = ['/Tpd', '/Gn', '/q', '/x']
+ self.ldflags_static = []
+ self.ldflags_exe = ['/Gn', '/q', '/x']
+ self.ldflags_exe_debug = ['/Gn', '/q', '/x','/r']
+
+
+ # -- Worker methods ------------------------------------------------
+
+ def compile(self, sources,
+ output_dir=None, macros=None, include_dirs=None, debug=0,
+ extra_preargs=None, extra_postargs=None, depends=None):
+
+ macros, objects, extra_postargs, pp_opts, build = \
+ self._setup_compile(output_dir, macros, include_dirs, sources,
+ depends, extra_postargs)
+ compile_opts = extra_preargs or []
+ compile_opts.append ('-c')
+ if debug:
+ compile_opts.extend (self.compile_options_debug)
+ else:
+ compile_opts.extend (self.compile_options)
+
+ for obj in objects:
+ try:
+ src, ext = build[obj]
+ except KeyError:
+ continue
+ # XXX why do the normpath here?
+ src = os.path.normpath(src)
+ obj = os.path.normpath(obj)
+ # XXX _setup_compile() did a mkpath() too but before the normpath.
+ # Is it possible to skip the normpath?
+ self.mkpath(os.path.dirname(obj))
+
+ if ext == '.res':
+ # This is already a binary file -- skip it.
+ continue # the 'for' loop
+ if ext == '.rc':
+ # This needs to be compiled to a .res file -- do it now.
+ try:
+ self.spawn (["brcc32", "-fo", obj, src])
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+ continue # the 'for' loop
+
+ # The next two are both for the real compiler.
+ if ext in self._c_extensions:
+ input_opt = ""
+ elif ext in self._cpp_extensions:
+ input_opt = "-P"
+ else:
+ # Unknown file type -- no extra options. The compiler
+ # will probably fail, but let it just in case this is a
+ # file the compiler recognizes even if we don't.
+ input_opt = ""
+
+ output_opt = "-o" + obj
+
+ # Compiler command line syntax is: "bcc32 [options] file(s)".
+ # Note that the source file names must appear at the end of
+ # the command line.
+ try:
+ self.spawn ([self.cc] + compile_opts + pp_opts +
+ [input_opt, output_opt] +
+ extra_postargs + [src])
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ return objects
+
+ # compile ()
+
+
+ def create_static_lib (self,
+ objects,
+ output_libname,
+ output_dir=None,
+ debug=0,
+ target_lang=None):
+
+ (objects, output_dir) = self._fix_object_args (objects, output_dir)
+ output_filename = \
+ self.library_filename (output_libname, output_dir=output_dir)
+
+ if self._need_link (objects, output_filename):
+ lib_args = [output_filename, '/u'] + objects
+ if debug:
+ pass # XXX what goes here?
+ try:
+ self.spawn ([self.lib] + lib_args)
+ except DistutilsExecError, msg:
+ raise LibError, msg
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ # create_static_lib ()
+
+
+ def link (self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir=None,
+ libraries=None,
+ library_dirs=None,
+ runtime_library_dirs=None,
+ export_symbols=None,
+ debug=0,
+ extra_preargs=None,
+ extra_postargs=None,
+ build_temp=None,
+ target_lang=None):
+
+ # XXX this ignores 'build_temp'! should follow the lead of
+ # msvccompiler.py
+
+ (objects, output_dir) = self._fix_object_args (objects, output_dir)
+ (libraries, library_dirs, runtime_library_dirs) = \
+ self._fix_lib_args (libraries, library_dirs, runtime_library_dirs)
+
+ if runtime_library_dirs:
+ log.warn("I don't know what to do with 'runtime_library_dirs': %s",
+ str(runtime_library_dirs))
+
+ if output_dir is not None:
+ output_filename = os.path.join (output_dir, output_filename)
+
+ if self._need_link (objects, output_filename):
+
+ # Figure out linker args based on type of target.
+ if target_desc == CCompiler.EXECUTABLE:
+ startup_obj = 'c0w32'
+ if debug:
+ ld_args = self.ldflags_exe_debug[:]
+ else:
+ ld_args = self.ldflags_exe[:]
+ else:
+ startup_obj = 'c0d32'
+ if debug:
+ ld_args = self.ldflags_shared_debug[:]
+ else:
+ ld_args = self.ldflags_shared[:]
+
+
+ # Create a temporary exports file for use by the linker
+ if export_symbols is None:
+ def_file = ''
+ else:
+ head, tail = os.path.split (output_filename)
+ modname, ext = os.path.splitext (tail)
+ temp_dir = os.path.dirname(objects[0]) # preserve tree structure
+ def_file = os.path.join (temp_dir, '%s.def' % modname)
+ contents = ['EXPORTS']
+ for sym in (export_symbols or []):
+ contents.append(' %s=_%s' % (sym, sym))
+ self.execute(write_file, (def_file, contents),
+ "writing %s" % def_file)
+
+ # Borland C++ has problems with '/' in paths
+ objects2 = map(os.path.normpath, objects)
+ # split objects in .obj and .res files
+ # Borland C++ needs them at different positions in the command line
+ objects = [startup_obj]
+ resources = []
+ for file in objects2:
+ (base, ext) = os.path.splitext(os.path.normcase(file))
+ if ext == '.res':
+ resources.append(file)
+ else:
+ objects.append(file)
+
+
+ for l in library_dirs:
+ ld_args.append("/L%s" % os.path.normpath(l))
+ ld_args.append("/L.") # we sometimes use relative paths
+
+ # list of object files
+ ld_args.extend(objects)
+
+ # XXX the command-line syntax for Borland C++ is a bit wonky;
+ # certain filenames are jammed together in one big string, but
+ # comma-delimited. This doesn't mesh too well with the
+ # Unix-centric attitude (with a DOS/Windows quoting hack) of
+ # 'spawn()', so constructing the argument list is a bit
+ # awkward. Note that doing the obvious thing and jamming all
+ # the filenames and commas into one argument would be wrong,
+ # because 'spawn()' would quote any filenames with spaces in
+ # them. Arghghh!. Apparently it works fine as coded...
+
+ # name of dll/exe file
+ ld_args.extend([',',output_filename])
+ # no map file and start libraries
+ ld_args.append(',,')
+
+ for lib in libraries:
+ # see if we find it and if there is a bcpp specific lib
+ # (xxx_bcpp.lib)
+ libfile = self.find_library_file(library_dirs, lib, debug)
+ if libfile is None:
+ ld_args.append(lib)
+ # probably a BCPP internal library -- don't warn
+ else:
+ # full name which prefers bcpp_xxx.lib over xxx.lib
+ ld_args.append(libfile)
+
+ # some default libraries
+ ld_args.append ('import32')
+ ld_args.append ('cw32mt')
+
+ # def file for export symbols
+ ld_args.extend([',',def_file])
+ # add resource files
+ ld_args.append(',')
+ ld_args.extend(resources)
+
+
+ if extra_preargs:
+ ld_args[:0] = extra_preargs
+ if extra_postargs:
+ ld_args.extend(extra_postargs)
+
+ self.mkpath (os.path.dirname (output_filename))
+ try:
+ self.spawn ([self.linker] + ld_args)
+ except DistutilsExecError, msg:
+ raise LinkError, msg
+
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ # link ()
+
+ # -- Miscellaneous methods -----------------------------------------
+
+
+ def find_library_file (self, dirs, lib, debug=0):
+ # List of effective library names to try, in order of preference:
+ # xxx_bcpp.lib is better than xxx.lib
+ # and xxx_d.lib is better than xxx.lib if debug is set
+ #
+ # The "_bcpp" suffix is to handle a Python installation for people
+ # with multiple compilers (primarily Distutils hackers, I suspect
+ # ;-). The idea is they'd have one static library for each
+ # compiler they care about, since (almost?) every Windows compiler
+ # seems to have a different format for static libraries.
+ if debug:
+ dlib = (lib + "_d")
+ try_names = (dlib + "_bcpp", lib + "_bcpp", dlib, lib)
+ else:
+ try_names = (lib + "_bcpp", lib)
+
+ for dir in dirs:
+ for name in try_names:
+ libfile = os.path.join(dir, self.library_filename(name))
+ if os.path.exists(libfile):
+ return libfile
+ else:
+ # Oops, didn't find it in *any* of 'dirs'
+ return None
+
+ # overwrite the one from CCompiler to support rc and res-files
+ def object_filenames (self,
+ source_filenames,
+ strip_dir=0,
+ output_dir=''):
+ if output_dir is None: output_dir = ''
+ obj_names = []
+ for src_name in source_filenames:
+ # use normcase to make sure '.rc' is really '.rc' and not '.RC'
+ (base, ext) = os.path.splitext (os.path.normcase(src_name))
+ if ext not in (self.src_extensions + ['.rc','.res']):
+ raise UnknownFileError, \
+ "unknown file type '%s' (from '%s')" % \
+ (ext, src_name)
+ if strip_dir:
+ base = os.path.basename (base)
+ if ext == '.res':
+ # these can go unchanged
+ obj_names.append (os.path.join (output_dir, base + ext))
+ elif ext == '.rc':
+ # these need to be compiled to .res-files
+ obj_names.append (os.path.join (output_dir, base + '.res'))
+ else:
+ obj_names.append (os.path.join (output_dir,
+ base + self.obj_extension))
+ return obj_names
+
+ # object_filenames ()
+
+ def preprocess (self,
+ source,
+ output_file=None,
+ macros=None,
+ include_dirs=None,
+ extra_preargs=None,
+ extra_postargs=None):
+
+ (_, macros, include_dirs) = \
+ self._fix_compile_args(None, macros, include_dirs)
+ pp_opts = gen_preprocess_options(macros, include_dirs)
+ pp_args = ['cpp32.exe'] + pp_opts
+ if output_file is not None:
+ pp_args.append('-o' + output_file)
+ if extra_preargs:
+ pp_args[:0] = extra_preargs
+ if extra_postargs:
+ pp_args.extend(extra_postargs)
+ pp_args.append(source)
+
+ # We need to preprocess: either we're being forced to, or the
+ # source file is newer than the target (or the target doesn't
+ # exist).
+ if self.force or output_file is None or newer(source, output_file):
+ if output_file:
+ self.mkpath(os.path.dirname(output_file))
+ try:
+ self.spawn(pp_args)
+ except DistutilsExecError, msg:
+ print msg
+ raise CompileError, msg
+
+ # preprocess()
diff --git a/cashew/Lib/distutils/ccompiler.py b/cashew/Lib/distutils/ccompiler.py
new file mode 100644
index 0000000..88a910c
--- /dev/null
+++ b/cashew/Lib/distutils/ccompiler.py
@@ -0,0 +1,1096 @@
+"""distutils.ccompiler
+
+Contains CCompiler, an abstract base class that defines the interface
+for the Distutils compiler abstraction model."""
+
+__revision__ = "$Id$"
+
+import sys
+import os
+import re
+
+from distutils.errors import (CompileError, LinkError, UnknownFileError,
+ DistutilsPlatformError, DistutilsModuleError)
+from distutils.spawn import spawn
+from distutils.file_util import move_file
+from distutils.dir_util import mkpath
+from distutils.dep_util import newer_group
+from distutils.util import split_quoted, execute
+from distutils import log
+# following import is for backward compatibility
+from distutils.sysconfig import customize_compiler
+
+class CCompiler:
+ """Abstract base class to define the interface that must be implemented
+ by real compiler classes. Also has some utility methods used by
+ several compiler classes.
+
+ The basic idea behind a compiler abstraction class is that each
+ instance can be used for all the compile/link steps in building a
+ single project. Thus, attributes common to all of those compile and
+ link steps -- include directories, macros to define, libraries to link
+ against, etc. -- are attributes of the compiler instance. To allow for
+ variability in how individual files are treated, most of those
+ attributes may be varied on a per-compilation or per-link basis.
+ """
+
+ # 'compiler_type' is a class attribute that identifies this class. It
+ # keeps code that wants to know what kind of compiler it's dealing with
+ # from having to import all possible compiler classes just to do an
+ # 'isinstance'. In concrete CCompiler subclasses, 'compiler_type'
+ # should really, really be one of the keys of the 'compiler_class'
+ # dictionary (see below -- used by the 'new_compiler()' factory
+ # function) -- authors of new compiler interface classes are
+ # responsible for updating 'compiler_class'!
+ compiler_type = None
+
+ # XXX things not handled by this compiler abstraction model:
+ # * client can't provide additional options for a compiler,
+ # e.g. warning, optimization, debugging flags. Perhaps this
+ # should be the domain of concrete compiler abstraction classes
+ # (UnixCCompiler, MSVCCompiler, etc.) -- or perhaps the base
+ # class should have methods for the common ones.
+ # * can't completely override the include or library searchg
+ # path, ie. no "cc -I -Idir1 -Idir2" or "cc -L -Ldir1 -Ldir2".
+ # I'm not sure how widely supported this is even by Unix
+ # compilers, much less on other platforms. And I'm even less
+ # sure how useful it is; maybe for cross-compiling, but
+ # support for that is a ways off. (And anyways, cross
+ # compilers probably have a dedicated binary with the
+ # right paths compiled in. I hope.)
+ # * can't do really freaky things with the library list/library
+ # dirs, e.g. "-Ldir1 -lfoo -Ldir2 -lfoo" to link against
+ # different versions of libfoo.a in different locations. I
+ # think this is useless without the ability to null out the
+ # library search path anyways.
+
+
+ # Subclasses that rely on the standard filename generation methods
+ # implemented below should override these; see the comment near
+ # those methods ('object_filenames()' et. al.) for details:
+ src_extensions = None # list of strings
+ obj_extension = None # string
+ static_lib_extension = None
+ shared_lib_extension = None # string
+ static_lib_format = None # format string
+ shared_lib_format = None # prob. same as static_lib_format
+ exe_extension = None # string
+
+ # Default language settings. language_map is used to detect a source
+ # file or Extension target language, checking source filenames.
+ # language_order is used to detect the language precedence, when deciding
+ # what language to use when mixing source types. For example, if some
+ # extension has two files with ".c" extension, and one with ".cpp", it
+ # is still linked as c++.
+ language_map = {".c" : "c",
+ ".cc" : "c++",
+ ".cpp" : "c++",
+ ".cxx" : "c++",
+ ".m" : "objc",
+ }
+ language_order = ["c++", "objc", "c"]
+
+ def __init__ (self, verbose=0, dry_run=0, force=0):
+ self.dry_run = dry_run
+ self.force = force
+ self.verbose = verbose
+
+ # 'output_dir': a common output directory for object, library,
+ # shared object, and shared library files
+ self.output_dir = None
+
+ # 'macros': a list of macro definitions (or undefinitions). A
+ # macro definition is a 2-tuple (name, value), where the value is
+ # either a string or None (no explicit value). A macro
+ # undefinition is a 1-tuple (name,).
+ self.macros = []
+
+ # 'include_dirs': a list of directories to search for include files
+ self.include_dirs = []
+
+ # 'libraries': a list of libraries to include in any link
+ # (library names, not filenames: eg. "foo" not "libfoo.a")
+ self.libraries = []
+
+ # 'library_dirs': a list of directories to search for libraries
+ self.library_dirs = []
+
+ # 'runtime_library_dirs': a list of directories to search for
+ # shared libraries/objects at runtime
+ self.runtime_library_dirs = []
+
+ # 'objects': a list of object files (or similar, such as explicitly
+ # named library files) to include on any link
+ self.objects = []
+
+ for key in self.executables.keys():
+ self.set_executable(key, self.executables[key])
+
+ def set_executables(self, **args):
+ """Define the executables (and options for them) that will be run
+ to perform the various stages of compilation. The exact set of
+ executables that may be specified here depends on the compiler
+ class (via the 'executables' class attribute), but most will have:
+ compiler the C/C++ compiler
+ linker_so linker used to create shared objects and libraries
+ linker_exe linker used to create binary executables
+ archiver static library creator
+
+ On platforms with a command-line (Unix, DOS/Windows), each of these
+ is a string that will be split into executable name and (optional)
+ list of arguments. (Splitting the string is done similarly to how
+ Unix shells operate: words are delimited by spaces, but quotes and
+ backslashes can override this. See
+ 'distutils.util.split_quoted()'.)
+ """
+
+ # Note that some CCompiler implementation classes will define class
+ # attributes 'cpp', 'cc', etc. with hard-coded executable names;
+ # this is appropriate when a compiler class is for exactly one
+ # compiler/OS combination (eg. MSVCCompiler). Other compiler
+ # classes (UnixCCompiler, in particular) are driven by information
+ # discovered at run-time, since there are many different ways to do
+ # basically the same things with Unix C compilers.
+
+ for key in args.keys():
+ if key not in self.executables:
+ raise ValueError, \
+ "unknown executable '%s' for class %s" % \
+ (key, self.__class__.__name__)
+ self.set_executable(key, args[key])
+
+ def set_executable(self, key, value):
+ if isinstance(value, basestring):
+ setattr(self, key, split_quoted(value))
+ else:
+ setattr(self, key, value)
+
+ def _find_macro(self, name):
+ i = 0
+ for defn in self.macros:
+ if defn[0] == name:
+ return i
+ i = i + 1
+ return None
+
+ def _check_macro_definitions(self, definitions):
+ """Ensures that every element of 'definitions' is a valid macro
+ definition, ie. either (name,value) 2-tuple or a (name,) tuple. Do
+ nothing if all definitions are OK, raise TypeError otherwise.
+ """
+ for defn in definitions:
+ if not (isinstance(defn, tuple) and
+ (len (defn) == 1 or
+ (len (defn) == 2 and
+ (isinstance(defn[1], str) or defn[1] is None))) and
+ isinstance(defn[0], str)):
+ raise TypeError, \
+ ("invalid macro definition '%s': " % defn) + \
+ "must be tuple (string,), (string, string), or " + \
+ "(string, None)"
+
+
+ # -- Bookkeeping methods -------------------------------------------
+
+ def define_macro(self, name, value=None):
+ """Define a preprocessor macro for all compilations driven by this
+ compiler object. The optional parameter 'value' should be a
+ string; if it is not supplied, then the macro will be defined
+ without an explicit value and the exact outcome depends on the
+ compiler used (XXX true? does ANSI say anything about this?)
+ """
+ # Delete from the list of macro definitions/undefinitions if
+ # already there (so that this one will take precedence).
+ i = self._find_macro (name)
+ if i is not None:
+ del self.macros[i]
+
+ defn = (name, value)
+ self.macros.append (defn)
+
+ def undefine_macro(self, name):
+ """Undefine a preprocessor macro for all compilations driven by
+ this compiler object. If the same macro is defined by
+ 'define_macro()' and undefined by 'undefine_macro()' the last call
+ takes precedence (including multiple redefinitions or
+ undefinitions). If the macro is redefined/undefined on a
+ per-compilation basis (ie. in the call to 'compile()'), then that
+ takes precedence.
+ """
+ # Delete from the list of macro definitions/undefinitions if
+ # already there (so that this one will take precedence).
+ i = self._find_macro (name)
+ if i is not None:
+ del self.macros[i]
+
+ undefn = (name,)
+ self.macros.append (undefn)
+
+ def add_include_dir(self, dir):
+ """Add 'dir' to the list of directories that will be searched for
+ header files. The compiler is instructed to search directories in
+ the order in which they are supplied by successive calls to
+ 'add_include_dir()'.
+ """
+ self.include_dirs.append (dir)
+
+ def set_include_dirs(self, dirs):
+ """Set the list of directories that will be searched to 'dirs' (a
+ list of strings). Overrides any preceding calls to
+ 'add_include_dir()'; subsequence calls to 'add_include_dir()' add
+ to the list passed to 'set_include_dirs()'. This does not affect
+ any list of standard include directories that the compiler may
+ search by default.
+ """
+ self.include_dirs = dirs[:]
+
+ def add_library(self, libname):
+ """Add 'libname' to the list of libraries that will be included in
+ all links driven by this compiler object. Note that 'libname'
+ should *not* be the name of a file containing a library, but the
+ name of the library itself: the actual filename will be inferred by
+ the linker, the compiler, or the compiler class (depending on the
+ platform).
+
+ The linker will be instructed to link against libraries in the
+ order they were supplied to 'add_library()' and/or
+ 'set_libraries()'. It is perfectly valid to duplicate library
+ names; the linker will be instructed to link against libraries as
+ many times as they are mentioned.
+ """
+ self.libraries.append (libname)
+
+ def set_libraries(self, libnames):
+ """Set the list of libraries to be included in all links driven by
+ this compiler object to 'libnames' (a list of strings). This does
+ not affect any standard system libraries that the linker may
+ include by default.
+ """
+ self.libraries = libnames[:]
+
+
+ def add_library_dir(self, dir):
+ """Add 'dir' to the list of directories that will be searched for
+ libraries specified to 'add_library()' and 'set_libraries()'. The
+ linker will be instructed to search for libraries in the order they
+ are supplied to 'add_library_dir()' and/or 'set_library_dirs()'.
+ """
+ self.library_dirs.append(dir)
+
+ def set_library_dirs(self, dirs):
+ """Set the list of library search directories to 'dirs' (a list of
+ strings). This does not affect any standard library search path
+ that the linker may search by default.
+ """
+ self.library_dirs = dirs[:]
+
+ def add_runtime_library_dir(self, dir):
+ """Add 'dir' to the list of directories that will be searched for
+ shared libraries at runtime.
+ """
+ self.runtime_library_dirs.append(dir)
+
+ def set_runtime_library_dirs(self, dirs):
+ """Set the list of directories to search for shared libraries at
+ runtime to 'dirs' (a list of strings). This does not affect any
+ standard search path that the runtime linker may search by
+ default.
+ """
+ self.runtime_library_dirs = dirs[:]
+
+ def add_link_object(self, object):
+ """Add 'object' to the list of object files (or analogues, such as
+ explicitly named library files or the output of "resource
+ compilers") to be included in every link driven by this compiler
+ object.
+ """
+ self.objects.append(object)
+
+ def set_link_objects(self, objects):
+ """Set the list of object files (or analogues) to be included in
+ every link to 'objects'. This does not affect any standard object
+ files that the linker may include by default (such as system
+ libraries).
+ """
+ self.objects = objects[:]
+
+
+ # -- Private utility methods --------------------------------------
+ # (here for the convenience of subclasses)
+
+ # Helper method to prep compiler in subclass compile() methods
+
+ def _setup_compile(self, outdir, macros, incdirs, sources, depends,
+ extra):
+ """Process arguments and decide which source files to compile."""
+ if outdir is None:
+ outdir = self.output_dir
+ elif not isinstance(outdir, str):
+ raise TypeError, "'output_dir' must be a string or None"
+
+ if macros is None:
+ macros = self.macros
+ elif isinstance(macros, list):
+ macros = macros + (self.macros or [])
+ else:
+ raise TypeError, "'macros' (if supplied) must be a list of tuples"
+
+ if incdirs is None:
+ incdirs = self.include_dirs
+ elif isinstance(incdirs, (list, tuple)):
+ incdirs = list(incdirs) + (self.include_dirs or [])
+ else:
+ raise TypeError, \
+ "'include_dirs' (if supplied) must be a list of strings"
+
+ if extra is None:
+ extra = []
+
+ # Get the list of expected output (object) files
+ objects = self.object_filenames(sources,
+ strip_dir=0,
+ output_dir=outdir)
+ assert len(objects) == len(sources)
+
+ pp_opts = gen_preprocess_options(macros, incdirs)
+
+ build = {}
+ for i in range(len(sources)):
+ src = sources[i]
+ obj = objects[i]
+ ext = os.path.splitext(src)[1]
+ self.mkpath(os.path.dirname(obj))
+ build[obj] = (src, ext)
+
+ return macros, objects, extra, pp_opts, build
+
+ def _get_cc_args(self, pp_opts, debug, before):
+ # works for unixccompiler, emxccompiler, cygwinccompiler
+ cc_args = pp_opts + ['-c']
+ if debug:
+ cc_args[:0] = ['-g']
+ if before:
+ cc_args[:0] = before
+ return cc_args
+
+ def _fix_compile_args(self, output_dir, macros, include_dirs):
+ """Typecheck and fix-up some of the arguments to the 'compile()'
+ method, and return fixed-up values. Specifically: if 'output_dir'
+ is None, replaces it with 'self.output_dir'; ensures that 'macros'
+ is a list, and augments it with 'self.macros'; ensures that
+ 'include_dirs' is a list, and augments it with 'self.include_dirs'.
+ Guarantees that the returned values are of the correct type,
+ i.e. for 'output_dir' either string or None, and for 'macros' and
+ 'include_dirs' either list or None.
+ """
+ if output_dir is None:
+ output_dir = self.output_dir
+ elif not isinstance(output_dir, str):
+ raise TypeError, "'output_dir' must be a string or None"
+
+ if macros is None:
+ macros = self.macros
+ elif isinstance(macros, list):
+ macros = macros + (self.macros or [])
+ else:
+ raise TypeError, "'macros' (if supplied) must be a list of tuples"
+
+ if include_dirs is None:
+ include_dirs = self.include_dirs
+ elif isinstance(include_dirs, (list, tuple)):
+ include_dirs = list (include_dirs) + (self.include_dirs or [])
+ else:
+ raise TypeError, \
+ "'include_dirs' (if supplied) must be a list of strings"
+
+ return output_dir, macros, include_dirs
+
+ def _fix_object_args(self, objects, output_dir):
+ """Typecheck and fix up some arguments supplied to various methods.
+ Specifically: ensure that 'objects' is a list; if output_dir is
+ None, replace with self.output_dir. Return fixed versions of
+ 'objects' and 'output_dir'.
+ """
+ if not isinstance(objects, (list, tuple)):
+ raise TypeError, \
+ "'objects' must be a list or tuple of strings"
+ objects = list (objects)
+
+ if output_dir is None:
+ output_dir = self.output_dir
+ elif not isinstance(output_dir, str):
+ raise TypeError, "'output_dir' must be a string or None"
+
+ return (objects, output_dir)
+
+ def _fix_lib_args(self, libraries, library_dirs, runtime_library_dirs):
+ """Typecheck and fix up some of the arguments supplied to the
+ 'link_*' methods. Specifically: ensure that all arguments are
+ lists, and augment them with their permanent versions
+ (eg. 'self.libraries' augments 'libraries'). Return a tuple with
+ fixed versions of all arguments.
+ """
+ if libraries is None:
+ libraries = self.libraries
+ elif isinstance(libraries, (list, tuple)):
+ libraries = list (libraries) + (self.libraries or [])
+ else:
+ raise TypeError, \
+ "'libraries' (if supplied) must be a list of strings"
+
+ if library_dirs is None:
+ library_dirs = self.library_dirs
+ elif isinstance(library_dirs, (list, tuple)):
+ library_dirs = list (library_dirs) + (self.library_dirs or [])
+ else:
+ raise TypeError, \
+ "'library_dirs' (if supplied) must be a list of strings"
+
+ if runtime_library_dirs is None:
+ runtime_library_dirs = self.runtime_library_dirs
+ elif isinstance(runtime_library_dirs, (list, tuple)):
+ runtime_library_dirs = (list (runtime_library_dirs) +
+ (self.runtime_library_dirs or []))
+ else:
+ raise TypeError, \
+ "'runtime_library_dirs' (if supplied) " + \
+ "must be a list of strings"
+
+ return (libraries, library_dirs, runtime_library_dirs)
+
+ def _need_link(self, objects, output_file):
+ """Return true if we need to relink the files listed in 'objects'
+ to recreate 'output_file'.
+ """
+ if self.force:
+ return 1
+ else:
+ if self.dry_run:
+ newer = newer_group (objects, output_file, missing='newer')
+ else:
+ newer = newer_group (objects, output_file)
+ return newer
+
+ def detect_language(self, sources):
+ """Detect the language of a given file, or list of files. Uses
+ language_map, and language_order to do the job.
+ """
+ if not isinstance(sources, list):
+ sources = [sources]
+ lang = None
+ index = len(self.language_order)
+ for source in sources:
+ base, ext = os.path.splitext(source)
+ extlang = self.language_map.get(ext)
+ try:
+ extindex = self.language_order.index(extlang)
+ if extindex < index:
+ lang = extlang
+ index = extindex
+ except ValueError:
+ pass
+ return lang
+
+ # -- Worker methods ------------------------------------------------
+ # (must be implemented by subclasses)
+
+ def preprocess(self, source, output_file=None, macros=None,
+ include_dirs=None, extra_preargs=None, extra_postargs=None):
+ """Preprocess a single C/C++ source file, named in 'source'.
+ Output will be written to file named 'output_file', or stdout if
+ 'output_file' not supplied. 'macros' is a list of macro
+ definitions as for 'compile()', which will augment the macros set
+ with 'define_macro()' and 'undefine_macro()'. 'include_dirs' is a
+ list of directory names that will be added to the default list.
+
+ Raises PreprocessError on failure.
+ """
+ pass
+
+ def compile(self, sources, output_dir=None, macros=None,
+ include_dirs=None, debug=0, extra_preargs=None,
+ extra_postargs=None, depends=None):
+ """Compile one or more source files.
+
+ 'sources' must be a list of filenames, most likely C/C++
+ files, but in reality anything that can be handled by a
+ particular compiler and compiler class (eg. MSVCCompiler can
+ handle resource files in 'sources'). Return a list of object
+ filenames, one per source filename in 'sources'. Depending on
+ the implementation, not all source files will necessarily be
+ compiled, but all corresponding object filenames will be
+ returned.
+
+ If 'output_dir' is given, object files will be put under it, while
+ retaining their original path component. That is, "foo/bar.c"
+ normally compiles to "foo/bar.o" (for a Unix implementation); if
+ 'output_dir' is "build", then it would compile to
+ "build/foo/bar.o".
+
+ 'macros', if given, must be a list of macro definitions. A macro
+ definition is either a (name, value) 2-tuple or a (name,) 1-tuple.
+ The former defines a macro; if the value is None, the macro is
+ defined without an explicit value. The 1-tuple case undefines a
+ macro. Later definitions/redefinitions/ undefinitions take
+ precedence.
+
+ 'include_dirs', if given, must be a list of strings, the
+ directories to add to the default include file search path for this
+ compilation only.
+
+ 'debug' is a boolean; if true, the compiler will be instructed to
+ output debug symbols in (or alongside) the object file(s).
+
+ 'extra_preargs' and 'extra_postargs' are implementation- dependent.
+ On platforms that have the notion of a command-line (e.g. Unix,
+ DOS/Windows), they are most likely lists of strings: extra
+ command-line arguments to prepand/append to the compiler command
+ line. On other platforms, consult the implementation class
+ documentation. In any event, they are intended as an escape hatch
+ for those occasions when the abstract compiler framework doesn't
+ cut the mustard.
+
+ 'depends', if given, is a list of filenames that all targets
+ depend on. If a source file is older than any file in
+ depends, then the source file will be recompiled. This
+ supports dependency tracking, but only at a coarse
+ granularity.
+
+ Raises CompileError on failure.
+ """
+ # A concrete compiler class can either override this method
+ # entirely or implement _compile().
+
+ macros, objects, extra_postargs, pp_opts, build = \
+ self._setup_compile(output_dir, macros, include_dirs, sources,
+ depends, extra_postargs)
+ cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
+
+ for obj in objects:
+ try:
+ src, ext = build[obj]
+ except KeyError:
+ continue
+ self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+ # Return *all* object filenames, not just the ones we just built.
+ return objects
+
+ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+ """Compile 'src' to product 'obj'."""
+
+ # A concrete compiler class that does not override compile()
+ # should implement _compile().
+ pass
+
+ def create_static_lib(self, objects, output_libname, output_dir=None,
+ debug=0, target_lang=None):
+ """Link a bunch of stuff together to create a static library file.
+ The "bunch of stuff" consists of the list of object files supplied
+ as 'objects', the extra object files supplied to
+ 'add_link_object()' and/or 'set_link_objects()', the libraries
+ supplied to 'add_library()' and/or 'set_libraries()', and the
+ libraries supplied as 'libraries' (if any).
+
+ 'output_libname' should be a library name, not a filename; the
+ filename will be inferred from the library name. 'output_dir' is
+ the directory where the library file will be put.
+
+ 'debug' is a boolean; if true, debugging information will be
+ included in the library (note that on most platforms, it is the
+ compile step where this matters: the 'debug' flag is included here
+ just for consistency).
+
+ 'target_lang' is the target language for which the given objects
+ are being compiled. This allows specific linkage time treatment of
+ certain languages.
+
+ Raises LibError on failure.
+ """
+ pass
+
+ # values for target_desc parameter in link()
+ SHARED_OBJECT = "shared_object"
+ SHARED_LIBRARY = "shared_library"
+ EXECUTABLE = "executable"
+
+ def link(self, target_desc, objects, output_filename, output_dir=None,
+ libraries=None, library_dirs=None, runtime_library_dirs=None,
+ export_symbols=None, debug=0, extra_preargs=None,
+ extra_postargs=None, build_temp=None, target_lang=None):
+ """Link a bunch of stuff together to create an executable or
+ shared library file.
+
+ The "bunch of stuff" consists of the list of object files supplied
+ as 'objects'. 'output_filename' should be a filename. If
+ 'output_dir' is supplied, 'output_filename' is relative to it
+ (i.e. 'output_filename' can provide directory components if
+ needed).
+
+ 'libraries' is a list of libraries to link against. These are
+ library names, not filenames, since they're translated into
+ filenames in a platform-specific way (eg. "foo" becomes "libfoo.a"
+ on Unix and "foo.lib" on DOS/Windows). However, they can include a
+ directory component, which means the linker will look in that
+ specific directory rather than searching all the normal locations.
+
+ 'library_dirs', if supplied, should be a list of directories to
+ search for libraries that were specified as bare library names
+ (ie. no directory component). These are on top of the system
+ default and those supplied to 'add_library_dir()' and/or
+ 'set_library_dirs()'. 'runtime_library_dirs' is a list of
+ directories that will be embedded into the shared library and used
+ to search for other shared libraries that *it* depends on at
+ run-time. (This may only be relevant on Unix.)
+
+ 'export_symbols' is a list of symbols that the shared library will
+ export. (This appears to be relevant only on Windows.)
+
+ 'debug' is as for 'compile()' and 'create_static_lib()', with the
+ slight distinction that it actually matters on most platforms (as
+ opposed to 'create_static_lib()', which includes a 'debug' flag
+ mostly for form's sake).
+
+ 'extra_preargs' and 'extra_postargs' are as for 'compile()' (except
+ of course that they supply command-line arguments for the
+ particular linker being used).
+
+ 'target_lang' is the target language for which the given objects
+ are being compiled. This allows specific linkage time treatment of
+ certain languages.
+
+ Raises LinkError on failure.
+ """
+ raise NotImplementedError
+
+
+ # Old 'link_*()' methods, rewritten to use the new 'link()' method.
+
+ def link_shared_lib(self, objects, output_libname, output_dir=None,
+ libraries=None, library_dirs=None,
+ runtime_library_dirs=None, export_symbols=None,
+ debug=0, extra_preargs=None, extra_postargs=None,
+ build_temp=None, target_lang=None):
+ self.link(CCompiler.SHARED_LIBRARY, objects,
+ self.library_filename(output_libname, lib_type='shared'),
+ output_dir,
+ libraries, library_dirs, runtime_library_dirs,
+ export_symbols, debug,
+ extra_preargs, extra_postargs, build_temp, target_lang)
+
+
+ def link_shared_object(self, objects, output_filename, output_dir=None,
+ libraries=None, library_dirs=None,
+ runtime_library_dirs=None, export_symbols=None,
+ debug=0, extra_preargs=None, extra_postargs=None,
+ build_temp=None, target_lang=None):
+ self.link(CCompiler.SHARED_OBJECT, objects,
+ output_filename, output_dir,
+ libraries, library_dirs, runtime_library_dirs,
+ export_symbols, debug,
+ extra_preargs, extra_postargs, build_temp, target_lang)
+
+ def link_executable(self, objects, output_progname, output_dir=None,
+ libraries=None, library_dirs=None,
+ runtime_library_dirs=None, debug=0, extra_preargs=None,
+ extra_postargs=None, target_lang=None):
+ self.link(CCompiler.EXECUTABLE, objects,
+ self.executable_filename(output_progname), output_dir,
+ libraries, library_dirs, runtime_library_dirs, None,
+ debug, extra_preargs, extra_postargs, None, target_lang)
+
+
+ # -- Miscellaneous methods -----------------------------------------
+ # These are all used by the 'gen_lib_options() function; there is
+ # no appropriate default implementation so subclasses should
+ # implement all of these.
+
+ def library_dir_option(self, dir):
+ """Return the compiler option to add 'dir' to the list of
+ directories searched for libraries.
+ """
+ raise NotImplementedError
+
+ def runtime_library_dir_option(self, dir):
+ """Return the compiler option to add 'dir' to the list of
+ directories searched for runtime libraries.
+ """
+ raise NotImplementedError
+
+ def library_option(self, lib):
+ """Return the compiler option to add 'lib' to the list of libraries
+ linked into the shared library or executable.
+ """
+ raise NotImplementedError
+
+ def has_function(self, funcname, includes=None, include_dirs=None,
+ libraries=None, library_dirs=None):
+ """Return a boolean indicating whether funcname is supported on
+ the current platform. The optional arguments can be used to
+ augment the compilation environment.
+ """
+
+ # this can't be included at module scope because it tries to
+ # import math which might not be available at that point - maybe
+ # the necessary logic should just be inlined?
+ import tempfile
+ if includes is None:
+ includes = []
+ if include_dirs is None:
+ include_dirs = []
+ if libraries is None:
+ libraries = []
+ if library_dirs is None:
+ library_dirs = []
+ fd, fname = tempfile.mkstemp(".c", funcname, text=True)
+ f = os.fdopen(fd, "w")
+ try:
+ for incl in includes:
+ f.write("""#include "%s"\n""" % incl)
+ f.write("""\
+main (int argc, char **argv) {
+ %s();
+}
+""" % funcname)
+ finally:
+ f.close()
+ try:
+ objects = self.compile([fname], include_dirs=include_dirs)
+ except CompileError:
+ return False
+
+ try:
+ self.link_executable(objects, "a.out",
+ libraries=libraries,
+ library_dirs=library_dirs)
+ except (LinkError, TypeError):
+ return False
+ return True
+
+ def find_library_file (self, dirs, lib, debug=0):
+ """Search the specified list of directories for a static or shared
+ library file 'lib' and return the full path to that file. If
+ 'debug' true, look for a debugging version (if that makes sense on
+ the current platform). Return None if 'lib' wasn't found in any of
+ the specified directories.
+ """
+ raise NotImplementedError
+
+ # -- Filename generation methods -----------------------------------
+
+ # The default implementation of the filename generating methods are
+ # prejudiced towards the Unix/DOS/Windows view of the world:
+ # * object files are named by replacing the source file extension
+ # (eg. .c/.cpp -> .o/.obj)
+ # * library files (shared or static) are named by plugging the
+ # library name and extension into a format string, eg.
+ # "lib%s.%s" % (lib_name, ".a") for Unix static libraries
+ # * executables are named by appending an extension (possibly
+ # empty) to the program name: eg. progname + ".exe" for
+ # Windows
+ #
+ # To reduce redundant code, these methods expect to find
+ # several attributes in the current object (presumably defined
+ # as class attributes):
+ # * src_extensions -
+ # list of C/C++ source file extensions, eg. ['.c', '.cpp']
+ # * obj_extension -
+ # object file extension, eg. '.o' or '.obj'
+ # * static_lib_extension -
+ # extension for static library files, eg. '.a' or '.lib'
+ # * shared_lib_extension -
+ # extension for shared library/object files, eg. '.so', '.dll'
+ # * static_lib_format -
+ # format string for generating static library filenames,
+ # eg. 'lib%s.%s' or '%s.%s'
+ # * shared_lib_format
+ # format string for generating shared library filenames
+ # (probably same as static_lib_format, since the extension
+ # is one of the intended parameters to the format string)
+ # * exe_extension -
+ # extension for executable files, eg. '' or '.exe'
+
+ def object_filenames(self, source_filenames, strip_dir=0, output_dir=''):
+ if output_dir is None:
+ output_dir = ''
+ obj_names = []
+ for src_name in source_filenames:
+ base, ext = os.path.splitext(src_name)
+ base = os.path.splitdrive(base)[1] # Chop off the drive
+ base = base[os.path.isabs(base):] # If abs, chop off leading /
+ if ext not in self.src_extensions:
+ raise UnknownFileError, \
+ "unknown file type '%s' (from '%s')" % (ext, src_name)
+ if strip_dir:
+ base = os.path.basename(base)
+ obj_names.append(os.path.join(output_dir,
+ base + self.obj_extension))
+ return obj_names
+
+ def shared_object_filename(self, basename, strip_dir=0, output_dir=''):
+ assert output_dir is not None
+ if strip_dir:
+ basename = os.path.basename (basename)
+ return os.path.join(output_dir, basename + self.shared_lib_extension)
+
+ def executable_filename(self, basename, strip_dir=0, output_dir=''):
+ assert output_dir is not None
+ if strip_dir:
+ basename = os.path.basename (basename)
+ return os.path.join(output_dir, basename + (self.exe_extension or ''))
+
+ def library_filename(self, libname, lib_type='static', # or 'shared'
+ strip_dir=0, output_dir=''):
+ assert output_dir is not None
+ if lib_type not in ("static", "shared", "dylib", "xcode_stub"):
+ raise ValueError, ("""'lib_type' must be "static", "shared", """
+ """"dylib", or "xcode_stub".""")
+ fmt = getattr(self, lib_type + "_lib_format")
+ ext = getattr(self, lib_type + "_lib_extension")
+
+ dir, base = os.path.split (libname)
+ filename = fmt % (base, ext)
+ if strip_dir:
+ dir = ''
+
+ return os.path.join(output_dir, dir, filename)
+
+
+ # -- Utility methods -----------------------------------------------
+
+ def announce(self, msg, level=1):
+ log.debug(msg)
+
+ def debug_print(self, msg):
+ from distutils.debug import DEBUG
+ if DEBUG:
+ print msg
+
+ def warn(self, msg):
+ sys.stderr.write("warning: %s\n" % msg)
+
+ def execute(self, func, args, msg=None, level=1):
+ execute(func, args, msg, self.dry_run)
+
+ def spawn(self, cmd):
+ spawn(cmd, dry_run=self.dry_run)
+
+ def move_file(self, src, dst):
+ return move_file(src, dst, dry_run=self.dry_run)
+
+ def mkpath(self, name, mode=0777):
+ mkpath(name, mode, dry_run=self.dry_run)
+
+
+# class CCompiler
+
+
+# Map a sys.platform/os.name ('posix', 'nt') to the default compiler
+# type for that platform. Keys are interpreted as re match
+# patterns. Order is important; platform mappings are preferred over
+# OS names.
+_default_compilers = (
+
+ # Platform string mappings
+
+ # on a cygwin built python we can use gcc like an ordinary UNIXish
+ # compiler
+ ('cygwin.*', 'unix'),
+ ('os2emx', 'emx'),
+
+ # OS name mappings
+ ('posix', 'unix'),
+ ('nt', 'msvc'),
+
+ )
+
+def get_default_compiler(osname=None, platform=None):
+ """ Determine the default compiler to use for the given platform.
+
+ osname should be one of the standard Python OS names (i.e. the
+ ones returned by os.name) and platform the common value
+ returned by sys.platform for the platform in question.
+
+ The default values are os.name and sys.platform in case the
+ parameters are not given.
+
+ """
+ if osname is None:
+ osname = os.name
+ if platform is None:
+ platform = sys.platform
+ for pattern, compiler in _default_compilers:
+ if re.match(pattern, platform) is not None or \
+ re.match(pattern, osname) is not None:
+ return compiler
+ # Default to Unix compiler
+ return 'unix'
+
+# Map compiler types to (module_name, class_name) pairs -- ie. where to
+# find the code that implements an interface to this compiler. (The module
+# is assumed to be in the 'distutils' package.)
+compiler_class = { 'unix': ('unixccompiler', 'UnixCCompiler',
+ "standard UNIX-style compiler"),
+ 'msvc': ('msvccompiler', 'MSVCCompiler',
+ "Microsoft Visual C++"),
+ 'cygwin': ('cygwinccompiler', 'CygwinCCompiler',
+ "Cygwin port of GNU C Compiler for Win32"),
+ 'mingw32': ('cygwinccompiler', 'Mingw32CCompiler',
+ "Mingw32 port of GNU C Compiler for Win32"),
+ 'bcpp': ('bcppcompiler', 'BCPPCompiler',
+ "Borland C++ Compiler"),
+ 'emx': ('emxccompiler', 'EMXCCompiler',
+ "EMX port of GNU C Compiler for OS/2"),
+ }
+
+def show_compilers():
+ """Print list of available compilers (used by the "--help-compiler"
+ options to "build", "build_ext", "build_clib").
+ """
+ # XXX this "knows" that the compiler option it's describing is
+ # "--compiler", which just happens to be the case for the three
+ # commands that use it.
+ from distutils.fancy_getopt import FancyGetopt
+ compilers = []
+ for compiler in compiler_class.keys():
+ compilers.append(("compiler="+compiler, None,
+ compiler_class[compiler][2]))
+ compilers.sort()
+ pretty_printer = FancyGetopt(compilers)
+ pretty_printer.print_help("List of available compilers:")
+
+
+def new_compiler(plat=None, compiler=None, verbose=0, dry_run=0, force=0):
+ """Generate an instance of some CCompiler subclass for the supplied
+ platform/compiler combination. 'plat' defaults to 'os.name'
+ (eg. 'posix', 'nt'), and 'compiler' defaults to the default compiler
+ for that platform. Currently only 'posix' and 'nt' are supported, and
+ the default compilers are "traditional Unix interface" (UnixCCompiler
+ class) and Visual C++ (MSVCCompiler class). Note that it's perfectly
+ possible to ask for a Unix compiler object under Windows, and a
+ Microsoft compiler object under Unix -- if you supply a value for
+ 'compiler', 'plat' is ignored.
+ """
+ if plat is None:
+ plat = os.name
+
+ try:
+ if compiler is None:
+ compiler = get_default_compiler(plat)
+
+ (module_name, class_name, long_description) = compiler_class[compiler]
+ except KeyError:
+ msg = "don't know how to compile C/C++ code on platform '%s'" % plat
+ if compiler is not None:
+ msg = msg + " with '%s' compiler" % compiler
+ raise DistutilsPlatformError, msg
+
+ try:
+ module_name = "distutils." + module_name
+ __import__ (module_name)
+ module = sys.modules[module_name]
+ klass = vars(module)[class_name]
+ except ImportError:
+ raise DistutilsModuleError, \
+ "can't compile C/C++ code: unable to load module '%s'" % \
+ module_name
+ except KeyError:
+ raise DistutilsModuleError, \
+ ("can't compile C/C++ code: unable to find class '%s' " +
+ "in module '%s'") % (class_name, module_name)
+
+ # XXX The None is necessary to preserve backwards compatibility
+ # with classes that expect verbose to be the first positional
+ # argument.
+ return klass(None, dry_run, force)
+
+
+def gen_preprocess_options(macros, include_dirs):
+ """Generate C pre-processor options (-D, -U, -I) as used by at least
+ two types of compilers: the typical Unix compiler and Visual C++.
+ 'macros' is the usual thing, a list of 1- or 2-tuples, where (name,)
+ means undefine (-U) macro 'name', and (name,value) means define (-D)
+ macro 'name' to 'value'. 'include_dirs' is just a list of directory
+ names to be added to the header file search path (-I). Returns a list
+ of command-line options suitable for either Unix compilers or Visual
+ C++.
+ """
+ # XXX it would be nice (mainly aesthetic, and so we don't generate
+ # stupid-looking command lines) to go over 'macros' and eliminate
+ # redundant definitions/undefinitions (ie. ensure that only the
+ # latest mention of a particular macro winds up on the command
+ # line). I don't think it's essential, though, since most (all?)
+ # Unix C compilers only pay attention to the latest -D or -U
+ # mention of a macro on their command line. Similar situation for
+ # 'include_dirs'. I'm punting on both for now. Anyways, weeding out
+ # redundancies like this should probably be the province of
+ # CCompiler, since the data structures used are inherited from it
+ # and therefore common to all CCompiler classes.
+
+ pp_opts = []
+ for macro in macros:
+
+ if not (isinstance(macro, tuple) and
+ 1 <= len (macro) <= 2):
+ raise TypeError, \
+ ("bad macro definition '%s': " +
+ "each element of 'macros' list must be a 1- or 2-tuple") % \
+ macro
+
+ if len (macro) == 1: # undefine this macro
+ pp_opts.append ("-U%s" % macro[0])
+ elif len (macro) == 2:
+ if macro[1] is None: # define with no explicit value
+ pp_opts.append ("-D%s" % macro[0])
+ else:
+ # XXX *don't* need to be clever about quoting the
+ # macro value here, because we're going to avoid the
+ # shell at all costs when we spawn the command!
+ pp_opts.append ("-D%s=%s" % macro)
+
+ for dir in include_dirs:
+ pp_opts.append ("-I%s" % dir)
+
+ return pp_opts
+
+
+def gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries):
+ """Generate linker options for searching library directories and
+ linking with specific libraries.
+
+ 'libraries' and 'library_dirs' are, respectively, lists of library names
+ (not filenames!) and search directories. Returns a list of command-line
+ options suitable for use with some compiler (depending on the two format
+ strings passed in).
+ """
+ lib_opts = []
+
+ for dir in library_dirs:
+ lib_opts.append(compiler.library_dir_option(dir))
+
+ for dir in runtime_library_dirs:
+ opt = compiler.runtime_library_dir_option(dir)
+ if isinstance(opt, list):
+ lib_opts.extend(opt)
+ else:
+ lib_opts.append(opt)
+
+ # XXX it's important that we *not* remove redundant library mentions!
+ # sometimes you really do have to say "-lfoo -lbar -lfoo" in order to
+ # resolve all symbols. I just hope we never have to say "-lfoo obj.o
+ # -lbar" to get things to work -- that's certainly a possibility, but a
+ # pretty nasty way to arrange your C code.
+
+ for lib in libraries:
+ lib_dir, lib_name = os.path.split(lib)
+ if lib_dir != '':
+ lib_file = compiler.find_library_file([lib_dir], lib_name)
+ if lib_file is not None:
+ lib_opts.append(lib_file)
+ else:
+ compiler.warn("no library file corresponding to "
+ "'%s' found (skipping)" % lib)
+ else:
+ lib_opts.append(compiler.library_option(lib))
+
+ return lib_opts
diff --git a/cashew/Lib/distutils/cmd.py b/cashew/Lib/distutils/cmd.py
new file mode 100644
index 0000000..9ad5657
--- /dev/null
+++ b/cashew/Lib/distutils/cmd.py
@@ -0,0 +1,457 @@
+"""distutils.cmd
+
+Provides the Command class, the base class for the command classes
+in the distutils.command package.
+"""
+
+__revision__ = "$Id$"
+
+import sys, os, re
+from distutils.errors import DistutilsOptionError
+from distutils import util, dir_util, file_util, archive_util, dep_util
+from distutils import log
+
+class Command:
+ """Abstract base class for defining command classes, the "worker bees"
+ of the Distutils. A useful analogy for command classes is to think of
+ them as subroutines with local variables called "options". The options
+ are "declared" in 'initialize_options()' and "defined" (given their
+ final values, aka "finalized") in 'finalize_options()', both of which
+ must be defined by every command class. The distinction between the
+ two is necessary because option values might come from the outside
+ world (command line, config file, ...), and any options dependent on
+ other options must be computed *after* these outside influences have
+ been processed -- hence 'finalize_options()'. The "body" of the
+ subroutine, where it does all its work based on the values of its
+ options, is the 'run()' method, which must also be implemented by every
+ command class.
+ """
+
+ # 'sub_commands' formalizes the notion of a "family" of commands,
+ # eg. "install" as the parent with sub-commands "install_lib",
+ # "install_headers", etc. The parent of a family of commands
+ # defines 'sub_commands' as a class attribute; it's a list of
+ # (command_name : string, predicate : unbound_method | string | None)
+ # tuples, where 'predicate' is a method of the parent command that
+ # determines whether the corresponding command is applicable in the
+ # current situation. (Eg. we "install_headers" is only applicable if
+ # we have any C header files to install.) If 'predicate' is None,
+ # that command is always applicable.
+ #
+ # 'sub_commands' is usually defined at the *end* of a class, because
+ # predicates can be unbound methods, so they must already have been
+ # defined. The canonical example is the "install" command.
+ sub_commands = []
+
+
+ # -- Creation/initialization methods -------------------------------
+
+ def __init__(self, dist):
+ """Create and initialize a new Command object. Most importantly,
+ invokes the 'initialize_options()' method, which is the real
+ initializer and depends on the actual command being
+ instantiated.
+ """
+ # late import because of mutual dependence between these classes
+ from distutils.dist import Distribution
+
+ if not isinstance(dist, Distribution):
+ raise TypeError, "dist must be a Distribution instance"
+ if self.__class__ is Command:
+ raise RuntimeError, "Command is an abstract class"
+
+ self.distribution = dist
+ self.initialize_options()
+
+ # Per-command versions of the global flags, so that the user can
+ # customize Distutils' behaviour command-by-command and let some
+ # commands fall back on the Distribution's behaviour. None means
+ # "not defined, check self.distribution's copy", while 0 or 1 mean
+ # false and true (duh). Note that this means figuring out the real
+ # value of each flag is a touch complicated -- hence "self._dry_run"
+ # will be handled by __getattr__, below.
+ # XXX This needs to be fixed.
+ self._dry_run = None
+
+ # verbose is largely ignored, but needs to be set for
+ # backwards compatibility (I think)?
+ self.verbose = dist.verbose
+
+ # Some commands define a 'self.force' option to ignore file
+ # timestamps, but methods defined *here* assume that
+ # 'self.force' exists for all commands. So define it here
+ # just to be safe.
+ self.force = None
+
+ # The 'help' flag is just used for command-line parsing, so
+ # none of that complicated bureaucracy is needed.
+ self.help = 0
+
+ # 'finalized' records whether or not 'finalize_options()' has been
+ # called. 'finalize_options()' itself should not pay attention to
+ # this flag: it is the business of 'ensure_finalized()', which
+ # always calls 'finalize_options()', to respect/update it.
+ self.finalized = 0
+
+ # XXX A more explicit way to customize dry_run would be better.
+ def __getattr__(self, attr):
+ if attr == 'dry_run':
+ myval = getattr(self, "_" + attr)
+ if myval is None:
+ return getattr(self.distribution, attr)
+ else:
+ return myval
+ else:
+ raise AttributeError, attr
+
+ def ensure_finalized(self):
+ if not self.finalized:
+ self.finalize_options()
+ self.finalized = 1
+
+ # Subclasses must define:
+ # initialize_options()
+ # provide default values for all options; may be customized by
+ # setup script, by options from config file(s), or by command-line
+ # options
+ # finalize_options()
+ # decide on the final values for all options; this is called
+ # after all possible intervention from the outside world
+ # (command-line, option file, etc.) has been processed
+ # run()
+ # run the command: do whatever it is we're here to do,
+ # controlled by the command's various option values
+
+ def initialize_options(self):
+ """Set default values for all the options that this command
+ supports. Note that these defaults may be overridden by other
+ commands, by the setup script, by config files, or by the
+ command-line. Thus, this is not the place to code dependencies
+ between options; generally, 'initialize_options()' implementations
+ are just a bunch of "self.foo = None" assignments.
+
+ This method must be implemented by all command classes.
+ """
+ raise RuntimeError, \
+ "abstract method -- subclass %s must override" % self.__class__
+
+ def finalize_options(self):
+ """Set final values for all the options that this command supports.
+ This is always called as late as possible, ie. after any option
+ assignments from the command-line or from other commands have been
+ done. Thus, this is the place to code option dependencies: if
+ 'foo' depends on 'bar', then it is safe to set 'foo' from 'bar' as
+ long as 'foo' still has the same value it was assigned in
+ 'initialize_options()'.
+
+ This method must be implemented by all command classes.
+ """
+ raise RuntimeError, \
+ "abstract method -- subclass %s must override" % self.__class__
+
+
+ def dump_options(self, header=None, indent=""):
+ from distutils.fancy_getopt import longopt_xlate
+ if header is None:
+ header = "command options for '%s':" % self.get_command_name()
+ self.announce(indent + header, level=log.INFO)
+ indent = indent + " "
+ for (option, _, _) in self.user_options:
+ option = option.translate(longopt_xlate)
+ if option[-1] == "=":
+ option = option[:-1]
+ value = getattr(self, option)
+ self.announce(indent + "%s = %s" % (option, value),
+ level=log.INFO)
+
+ def run(self):
+ """A command's raison d'etre: carry out the action it exists to
+ perform, controlled by the options initialized in
+ 'initialize_options()', customized by other commands, the setup
+ script, the command-line, and config files, and finalized in
+ 'finalize_options()'. All terminal output and filesystem
+ interaction should be done by 'run()'.
+
+ This method must be implemented by all command classes.
+ """
+ raise RuntimeError, \
+ "abstract method -- subclass %s must override" % self.__class__
+
+ def announce(self, msg, level=1):
+ """If the current verbosity level is of greater than or equal to
+ 'level' print 'msg' to stdout.
+ """
+ log.log(level, msg)
+
+ def debug_print(self, msg):
+ """Print 'msg' to stdout if the global DEBUG (taken from the
+ DISTUTILS_DEBUG environment variable) flag is true.
+ """
+ from distutils.debug import DEBUG
+ if DEBUG:
+ print msg
+ sys.stdout.flush()
+
+
+ # -- Option validation methods -------------------------------------
+ # (these are very handy in writing the 'finalize_options()' method)
+ #
+ # NB. the general philosophy here is to ensure that a particular option
+ # value meets certain type and value constraints. If not, we try to
+ # force it into conformance (eg. if we expect a list but have a string,
+ # split the string on comma and/or whitespace). If we can't force the
+ # option into conformance, raise DistutilsOptionError. Thus, command
+ # classes need do nothing more than (eg.)
+ # self.ensure_string_list('foo')
+ # and they can be guaranteed that thereafter, self.foo will be
+ # a list of strings.
+
+ def _ensure_stringlike(self, option, what, default=None):
+ val = getattr(self, option)
+ if val is None:
+ setattr(self, option, default)
+ return default
+ elif not isinstance(val, str):
+ raise DistutilsOptionError, \
+ "'%s' must be a %s (got `%s`)" % (option, what, val)
+ return val
+
+ def ensure_string(self, option, default=None):
+ """Ensure that 'option' is a string; if not defined, set it to
+ 'default'.
+ """
+ self._ensure_stringlike(option, "string", default)
+
+ def ensure_string_list(self, option):
+ """Ensure that 'option' is a list of strings. If 'option' is
+ currently a string, we split it either on /,\s*/ or /\s+/, so
+ "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become
+ ["foo", "bar", "baz"].
+ """
+ val = getattr(self, option)
+ if val is None:
+ return
+ elif isinstance(val, str):
+ setattr(self, option, re.split(r',\s*|\s+', val))
+ else:
+ if isinstance(val, list):
+ # checks if all elements are str
+ ok = 1
+ for element in val:
+ if not isinstance(element, str):
+ ok = 0
+ break
+ else:
+ ok = 0
+
+ if not ok:
+ raise DistutilsOptionError, \
+ "'%s' must be a list of strings (got %r)" % \
+ (option, val)
+
+
+ def _ensure_tested_string(self, option, tester,
+ what, error_fmt, default=None):
+ val = self._ensure_stringlike(option, what, default)
+ if val is not None and not tester(val):
+ raise DistutilsOptionError, \
+ ("error in '%s' option: " + error_fmt) % (option, val)
+
+ def ensure_filename(self, option):
+ """Ensure that 'option' is the name of an existing file."""
+ self._ensure_tested_string(option, os.path.isfile,
+ "filename",
+ "'%s' does not exist or is not a file")
+
+ def ensure_dirname(self, option):
+ self._ensure_tested_string(option, os.path.isdir,
+ "directory name",
+ "'%s' does not exist or is not a directory")
+
+
+ # -- Convenience methods for commands ------------------------------
+
+ def get_command_name(self):
+ if hasattr(self, 'command_name'):
+ return self.command_name
+ else:
+ return self.__class__.__name__
+
+ def set_undefined_options(self, src_cmd, *option_pairs):
+ """Set the values of any "undefined" options from corresponding
+ option values in some other command object. "Undefined" here means
+ "is None", which is the convention used to indicate that an option
+ has not been changed between 'initialize_options()' and
+ 'finalize_options()'. Usually called from 'finalize_options()' for
+ options that depend on some other command rather than another
+ option of the same command. 'src_cmd' is the other command from
+ which option values will be taken (a command object will be created
+ for it if necessary); the remaining arguments are
+ '(src_option,dst_option)' tuples which mean "take the value of
+ 'src_option' in the 'src_cmd' command object, and copy it to
+ 'dst_option' in the current command object".
+ """
+
+ # Option_pairs: list of (src_option, dst_option) tuples
+
+ src_cmd_obj = self.distribution.get_command_obj(src_cmd)
+ src_cmd_obj.ensure_finalized()
+ for (src_option, dst_option) in option_pairs:
+ if getattr(self, dst_option) is None:
+ setattr(self, dst_option,
+ getattr(src_cmd_obj, src_option))
+
+
+ def get_finalized_command(self, command, create=1):
+ """Wrapper around Distribution's 'get_command_obj()' method: find
+ (create if necessary and 'create' is true) the command object for
+ 'command', call its 'ensure_finalized()' method, and return the
+ finalized command object.
+ """
+ cmd_obj = self.distribution.get_command_obj(command, create)
+ cmd_obj.ensure_finalized()
+ return cmd_obj
+
+ # XXX rename to 'get_reinitialized_command()'? (should do the
+ # same in dist.py, if so)
+ def reinitialize_command(self, command, reinit_subcommands=0):
+ return self.distribution.reinitialize_command(
+ command, reinit_subcommands)
+
+ def run_command(self, command):
+ """Run some other command: uses the 'run_command()' method of
+ Distribution, which creates and finalizes the command object if
+ necessary and then invokes its 'run()' method.
+ """
+ self.distribution.run_command(command)
+
+ def get_sub_commands(self):
+ """Determine the sub-commands that are relevant in the current
+ distribution (ie., that need to be run). This is based on the
+ 'sub_commands' class attribute: each tuple in that list may include
+ a method that we call to determine if the subcommand needs to be
+ run for the current distribution. Return a list of command names.
+ """
+ commands = []
+ for (cmd_name, method) in self.sub_commands:
+ if method is None or method(self):
+ commands.append(cmd_name)
+ return commands
+
+
+ # -- External world manipulation -----------------------------------
+
+ def warn(self, msg):
+ log.warn("warning: %s: %s\n" %
+ (self.get_command_name(), msg))
+
+ def execute(self, func, args, msg=None, level=1):
+ util.execute(func, args, msg, dry_run=self.dry_run)
+
+ def mkpath(self, name, mode=0777):
+ dir_util.mkpath(name, mode, dry_run=self.dry_run)
+
+ def copy_file(self, infile, outfile,
+ preserve_mode=1, preserve_times=1, link=None, level=1):
+ """Copy a file respecting verbose, dry-run and force flags. (The
+ former two default to whatever is in the Distribution object, and
+ the latter defaults to false for commands that don't define it.)"""
+
+ return file_util.copy_file(
+ infile, outfile,
+ preserve_mode, preserve_times,
+ not self.force,
+ link,
+ dry_run=self.dry_run)
+
+ def copy_tree(self, infile, outfile,
+ preserve_mode=1, preserve_times=1, preserve_symlinks=0,
+ level=1):
+ """Copy an entire directory tree respecting verbose, dry-run,
+ and force flags.
+ """
+ return dir_util.copy_tree(
+ infile, outfile,
+ preserve_mode,preserve_times,preserve_symlinks,
+ not self.force,
+ dry_run=self.dry_run)
+
+ def move_file (self, src, dst, level=1):
+ """Move a file respecting dry-run flag."""
+ return file_util.move_file(src, dst, dry_run = self.dry_run)
+
+ def spawn (self, cmd, search_path=1, level=1):
+ """Spawn an external command respecting dry-run flag."""
+ from distutils.spawn import spawn
+ spawn(cmd, search_path, dry_run= self.dry_run)
+
+ def make_archive(self, base_name, format, root_dir=None, base_dir=None,
+ owner=None, group=None):
+ return archive_util.make_archive(base_name, format, root_dir,
+ base_dir, dry_run=self.dry_run,
+ owner=owner, group=group)
+
+ def make_file(self, infiles, outfile, func, args,
+ exec_msg=None, skip_msg=None, level=1):
+ """Special case of 'execute()' for operations that process one or
+ more input files and generate one output file. Works just like
+ 'execute()', except the operation is skipped and a different
+ message printed if 'outfile' already exists and is newer than all
+ files listed in 'infiles'. If the command defined 'self.force',
+ and it is true, then the command is unconditionally run -- does no
+ timestamp checks.
+ """
+ if skip_msg is None:
+ skip_msg = "skipping %s (inputs unchanged)" % outfile
+
+ # Allow 'infiles' to be a single string
+ if isinstance(infiles, str):
+ infiles = (infiles,)
+ elif not isinstance(infiles, (list, tuple)):
+ raise TypeError, \
+ "'infiles' must be a string, or a list or tuple of strings"
+
+ if exec_msg is None:
+ exec_msg = "generating %s from %s" % \
+ (outfile, ', '.join(infiles))
+
+ # If 'outfile' must be regenerated (either because it doesn't
+ # exist, is out-of-date, or the 'force' flag is true) then
+ # perform the action that presumably regenerates it
+ if self.force or dep_util.newer_group(infiles, outfile):
+ self.execute(func, args, exec_msg, level)
+
+ # Otherwise, print the "skip" message
+ else:
+ log.debug(skip_msg)
+
+# XXX 'install_misc' class not currently used -- it was the base class for
+# both 'install_scripts' and 'install_data', but they outgrew it. It might
+# still be useful for 'install_headers', though, so I'm keeping it around
+# for the time being.
+
+class install_misc(Command):
+ """Common base class for installing some files in a subdirectory.
+ Currently used by install_data and install_scripts.
+ """
+
+ user_options = [('install-dir=', 'd', "directory to install the files to")]
+
+ def initialize_options (self):
+ self.install_dir = None
+ self.outfiles = []
+
+ def _install_dir_from(self, dirname):
+ self.set_undefined_options('install', (dirname, 'install_dir'))
+
+ def _copy_files(self, filelist):
+ self.outfiles = []
+ if not filelist:
+ return
+ self.mkpath(self.install_dir)
+ for f in filelist:
+ self.copy_file(f, self.install_dir)
+ self.outfiles.append(os.path.join(self.install_dir, f))
+
+ def get_outputs(self):
+ return self.outfiles
diff --git a/cashew/Lib/distutils/command/__init__.py b/cashew/Lib/distutils/command/__init__.py
new file mode 100644
index 0000000..20b159f
--- /dev/null
+++ b/cashew/Lib/distutils/command/__init__.py
@@ -0,0 +1,33 @@
+"""distutils.command
+
+Package containing implementation of all the standard Distutils
+commands."""
+
+__revision__ = "$Id$"
+
+__all__ = ['build',
+ 'build_py',
+ 'build_ext',
+ 'build_clib',
+ 'build_scripts',
+ 'clean',
+ 'install',
+ 'install_lib',
+ 'install_headers',
+ 'install_scripts',
+ 'install_data',
+ 'sdist',
+ 'register',
+ 'bdist',
+ 'bdist_dumb',
+ 'bdist_rpm',
+ 'bdist_wininst',
+ 'upload',
+ 'check',
+ # These two are reserved for future use:
+ #'bdist_sdux',
+ #'bdist_pkgtool',
+ # Note:
+ # bdist_packager is not included because it only provides
+ # an abstract base class
+ ]
diff --git a/cashew/Lib/distutils/command/bdist.py b/cashew/Lib/distutils/command/bdist.py
new file mode 100644
index 0000000..d7910b1
--- /dev/null
+++ b/cashew/Lib/distutils/command/bdist.py
@@ -0,0 +1,146 @@
+"""distutils.command.bdist
+
+Implements the Distutils 'bdist' command (create a built [binary]
+distribution)."""
+
+__revision__ = "$Id$"
+
+import os
+
+from distutils.util import get_platform
+from distutils.core import Command
+from distutils.errors import DistutilsPlatformError, DistutilsOptionError
+
+
+def show_formats():
+ """Print list of available formats (arguments to "--format" option).
+ """
+ from distutils.fancy_getopt import FancyGetopt
+ formats = []
+ for format in bdist.format_commands:
+ formats.append(("formats=" + format, None,
+ bdist.format_command[format][1]))
+ pretty_printer = FancyGetopt(formats)
+ pretty_printer.print_help("List of available distribution formats:")
+
+
+class bdist(Command):
+
+ description = "create a built (binary) distribution"
+
+ user_options = [('bdist-base=', 'b',
+ "temporary directory for creating built distributions"),
+ ('plat-name=', 'p',
+ "platform name to embed in generated filenames "
+ "(default: %s)" % get_platform()),
+ ('formats=', None,
+ "formats for distribution (comma-separated list)"),
+ ('dist-dir=', 'd',
+ "directory to put final built distributions in "
+ "[default: dist]"),
+ ('skip-build', None,
+ "skip rebuilding everything (for testing/debugging)"),
+ ('owner=', 'u',
+ "Owner name used when creating a tar file"
+ " [default: current user]"),
+ ('group=', 'g',
+ "Group name used when creating a tar file"
+ " [default: current group]"),
+ ]
+
+ boolean_options = ['skip-build']
+
+ help_options = [
+ ('help-formats', None,
+ "lists available distribution formats", show_formats),
+ ]
+
+ # The following commands do not take a format option from bdist
+ no_format_option = ('bdist_rpm',)
+
+ # This won't do in reality: will need to distinguish RPM-ish Linux,
+ # Debian-ish Linux, Solaris, FreeBSD, ..., Windows, Mac OS.
+ default_format = {'posix': 'gztar',
+ 'nt': 'zip',
+ 'os2': 'zip'}
+
+ # Establish the preferred order (for the --help-formats option).
+ format_commands = ['rpm', 'gztar', 'bztar', 'ztar', 'tar',
+ 'wininst', 'zip', 'msi']
+
+ # And the real information.
+ format_command = {'rpm': ('bdist_rpm', "RPM distribution"),
+ 'gztar': ('bdist_dumb', "gzip'ed tar file"),
+ 'bztar': ('bdist_dumb', "bzip2'ed tar file"),
+ 'ztar': ('bdist_dumb', "compressed tar file"),
+ 'tar': ('bdist_dumb', "tar file"),
+ 'wininst': ('bdist_wininst',
+ "Windows executable installer"),
+ 'zip': ('bdist_dumb', "ZIP file"),
+ 'msi': ('bdist_msi', "Microsoft Installer")
+ }
+
+
+ def initialize_options(self):
+ self.bdist_base = None
+ self.plat_name = None
+ self.formats = None
+ self.dist_dir = None
+ self.skip_build = 0
+ self.group = None
+ self.owner = None
+
+ def finalize_options(self):
+ # have to finalize 'plat_name' before 'bdist_base'
+ if self.plat_name is None:
+ if self.skip_build:
+ self.plat_name = get_platform()
+ else:
+ self.plat_name = self.get_finalized_command('build').plat_name
+
+ # 'bdist_base' -- parent of per-built-distribution-format
+ # temporary directories (eg. we'll probably have
+ # "build/bdist./dumb", "build/bdist./rpm", etc.)
+ if self.bdist_base is None:
+ build_base = self.get_finalized_command('build').build_base
+ self.bdist_base = os.path.join(build_base,
+ 'bdist.' + self.plat_name)
+
+ self.ensure_string_list('formats')
+ if self.formats is None:
+ try:
+ self.formats = [self.default_format[os.name]]
+ except KeyError:
+ raise DistutilsPlatformError, \
+ "don't know how to create built distributions " + \
+ "on platform %s" % os.name
+
+ if self.dist_dir is None:
+ self.dist_dir = "dist"
+
+ def run(self):
+ # Figure out which sub-commands we need to run.
+ commands = []
+ for format in self.formats:
+ try:
+ commands.append(self.format_command[format][0])
+ except KeyError:
+ raise DistutilsOptionError, "invalid format '%s'" % format
+
+ # Reinitialize and run each command.
+ for i in range(len(self.formats)):
+ cmd_name = commands[i]
+ sub_cmd = self.reinitialize_command(cmd_name)
+ if cmd_name not in self.no_format_option:
+ sub_cmd.format = self.formats[i]
+
+ # passing the owner and group names for tar archiving
+ if cmd_name == 'bdist_dumb':
+ sub_cmd.owner = self.owner
+ sub_cmd.group = self.group
+
+ # If we're going to need to run this command again, tell it to
+ # keep its temporary files around so subsequent runs go faster.
+ if cmd_name in commands[i+1:]:
+ sub_cmd.keep_temp = 1
+ self.run_command(cmd_name)
diff --git a/cashew/Lib/distutils/command/bdist_dumb.py b/cashew/Lib/distutils/command/bdist_dumb.py
new file mode 100644
index 0000000..2f3c668
--- /dev/null
+++ b/cashew/Lib/distutils/command/bdist_dumb.py
@@ -0,0 +1,133 @@
+"""distutils.command.bdist_dumb
+
+Implements the Distutils 'bdist_dumb' command (create a "dumb" built
+distribution -- i.e., just an archive to be unpacked under $prefix or
+$exec_prefix)."""
+
+__revision__ = "$Id$"
+
+import os
+
+from sysconfig import get_python_version
+
+from distutils.util import get_platform
+from distutils.core import Command
+from distutils.dir_util import remove_tree, ensure_relative
+from distutils.errors import DistutilsPlatformError
+from distutils import log
+
+class bdist_dumb (Command):
+
+ description = 'create a "dumb" built distribution'
+
+ user_options = [('bdist-dir=', 'd',
+ "temporary directory for creating the distribution"),
+ ('plat-name=', 'p',
+ "platform name to embed in generated filenames "
+ "(default: %s)" % get_platform()),
+ ('format=', 'f',
+ "archive format to create (tar, ztar, gztar, zip)"),
+ ('keep-temp', 'k',
+ "keep the pseudo-installation tree around after " +
+ "creating the distribution archive"),
+ ('dist-dir=', 'd',
+ "directory to put final built distributions in"),
+ ('skip-build', None,
+ "skip rebuilding everything (for testing/debugging)"),
+ ('relative', None,
+ "build the archive using relative paths"
+ "(default: false)"),
+ ('owner=', 'u',
+ "Owner name used when creating a tar file"
+ " [default: current user]"),
+ ('group=', 'g',
+ "Group name used when creating a tar file"
+ " [default: current group]"),
+ ]
+
+ boolean_options = ['keep-temp', 'skip-build', 'relative']
+
+ default_format = { 'posix': 'gztar',
+ 'nt': 'zip',
+ 'os2': 'zip' }
+
+
+ def initialize_options (self):
+ self.bdist_dir = None
+ self.plat_name = None
+ self.format = None
+ self.keep_temp = 0
+ self.dist_dir = None
+ self.skip_build = None
+ self.relative = 0
+ self.owner = None
+ self.group = None
+
+ def finalize_options(self):
+ if self.bdist_dir is None:
+ bdist_base = self.get_finalized_command('bdist').bdist_base
+ self.bdist_dir = os.path.join(bdist_base, 'dumb')
+
+ if self.format is None:
+ try:
+ self.format = self.default_format[os.name]
+ except KeyError:
+ raise DistutilsPlatformError, \
+ ("don't know how to create dumb built distributions " +
+ "on platform %s") % os.name
+
+ self.set_undefined_options('bdist',
+ ('dist_dir', 'dist_dir'),
+ ('plat_name', 'plat_name'),
+ ('skip_build', 'skip_build'))
+
+ def run(self):
+ if not self.skip_build:
+ self.run_command('build')
+
+ install = self.reinitialize_command('install', reinit_subcommands=1)
+ install.root = self.bdist_dir
+ install.skip_build = self.skip_build
+ install.warn_dir = 0
+
+ log.info("installing to %s" % self.bdist_dir)
+ self.run_command('install')
+
+ # And make an archive relative to the root of the
+ # pseudo-installation tree.
+ archive_basename = "%s.%s" % (self.distribution.get_fullname(),
+ self.plat_name)
+
+ # OS/2 objects to any ":" characters in a filename (such as when
+ # a timestamp is used in a version) so change them to hyphens.
+ if os.name == "os2":
+ archive_basename = archive_basename.replace(":", "-")
+
+ pseudoinstall_root = os.path.join(self.dist_dir, archive_basename)
+ if not self.relative:
+ archive_root = self.bdist_dir
+ else:
+ if (self.distribution.has_ext_modules() and
+ (install.install_base != install.install_platbase)):
+ raise DistutilsPlatformError, \
+ ("can't make a dumb built distribution where "
+ "base and platbase are different (%s, %s)"
+ % (repr(install.install_base),
+ repr(install.install_platbase)))
+ else:
+ archive_root = os.path.join(self.bdist_dir,
+ ensure_relative(install.install_base))
+
+ # Make the archive
+ filename = self.make_archive(pseudoinstall_root,
+ self.format, root_dir=archive_root,
+ owner=self.owner, group=self.group)
+ if self.distribution.has_ext_modules():
+ pyversion = get_python_version()
+ else:
+ pyversion = 'any'
+ self.distribution.dist_files.append(('bdist_dumb', pyversion,
+ filename))
+
+ if not self.keep_temp:
+ remove_tree(self.bdist_dir, dry_run=self.dry_run)
diff --git a/cashew/Lib/distutils/command/bdist_rpm.py b/cashew/Lib/distutils/command/bdist_rpm.py
new file mode 100644
index 0000000..477e0ee
--- /dev/null
+++ b/cashew/Lib/distutils/command/bdist_rpm.py
@@ -0,0 +1,588 @@
+"""distutils.command.bdist_rpm
+
+Implements the Distutils 'bdist_rpm' command (create RPM source and binary
+distributions)."""
+
+__revision__ = "$Id$"
+
+import sys
+import os
+import string
+
+from distutils.core import Command
+from distutils.debug import DEBUG
+from distutils.file_util import write_file
+from distutils.sysconfig import get_python_version
+from distutils.errors import (DistutilsOptionError, DistutilsPlatformError,
+ DistutilsFileError, DistutilsExecError)
+from distutils import log
+
+class bdist_rpm (Command):
+
+ description = "create an RPM distribution"
+
+ user_options = [
+ ('bdist-base=', None,
+ "base directory for creating built distributions"),
+ ('rpm-base=', None,
+ "base directory for creating RPMs (defaults to \"rpm\" under "
+ "--bdist-base; must be specified for RPM 2)"),
+ ('dist-dir=', 'd',
+ "directory to put final RPM files in "
+ "(and .spec files if --spec-only)"),
+ ('python=', None,
+ "path to Python interpreter to hard-code in the .spec file "
+ "(default: \"python\")"),
+ ('fix-python', None,
+ "hard-code the exact path to the current Python interpreter in "
+ "the .spec file"),
+ ('spec-only', None,
+ "only regenerate spec file"),
+ ('source-only', None,
+ "only generate source RPM"),
+ ('binary-only', None,
+ "only generate binary RPM"),
+ ('use-bzip2', None,
+ "use bzip2 instead of gzip to create source distribution"),
+
+ # More meta-data: too RPM-specific to put in the setup script,
+ # but needs to go in the .spec file -- so we make these options
+ # to "bdist_rpm". The idea is that packagers would put this
+ # info in setup.cfg, although they are of course free to
+ # supply it on the command line.
+ ('distribution-name=', None,
+ "name of the (Linux) distribution to which this "
+ "RPM applies (*not* the name of the module distribution!)"),
+ ('group=', None,
+ "package classification [default: \"Development/Libraries\"]"),
+ ('release=', None,
+ "RPM release number"),
+ ('serial=', None,
+ "RPM serial number"),
+ ('vendor=', None,
+ "RPM \"vendor\" (eg. \"Joe Blow \") "
+ "[default: maintainer or author from setup script]"),
+ ('packager=', None,
+ "RPM packager (eg. \"Jane Doe \")"
+ "[default: vendor]"),
+ ('doc-files=', None,
+ "list of documentation files (space or comma-separated)"),
+ ('changelog=', None,
+ "RPM changelog"),
+ ('icon=', None,
+ "name of icon file"),
+ ('provides=', None,
+ "capabilities provided by this package"),
+ ('requires=', None,
+ "capabilities required by this package"),
+ ('conflicts=', None,
+ "capabilities which conflict with this package"),
+ ('build-requires=', None,
+ "capabilities required to build this package"),
+ ('obsoletes=', None,
+ "capabilities made obsolete by this package"),
+ ('no-autoreq', None,
+ "do not automatically calculate dependencies"),
+
+ # Actions to take when building RPM
+ ('keep-temp', 'k',
+ "don't clean up RPM build directory"),
+ ('no-keep-temp', None,
+ "clean up RPM build directory [default]"),
+ ('use-rpm-opt-flags', None,
+ "compile with RPM_OPT_FLAGS when building from source RPM"),
+ ('no-rpm-opt-flags', None,
+ "do not pass any RPM CFLAGS to compiler"),
+ ('rpm3-mode', None,
+ "RPM 3 compatibility mode (default)"),
+ ('rpm2-mode', None,
+ "RPM 2 compatibility mode"),
+
+ # Add the hooks necessary for specifying custom scripts
+ ('prep-script=', None,
+ "Specify a script for the PREP phase of RPM building"),
+ ('build-script=', None,
+ "Specify a script for the BUILD phase of RPM building"),
+
+ ('pre-install=', None,
+ "Specify a script for the pre-INSTALL phase of RPM building"),
+ ('install-script=', None,
+ "Specify a script for the INSTALL phase of RPM building"),
+ ('post-install=', None,
+ "Specify a script for the post-INSTALL phase of RPM building"),
+
+ ('pre-uninstall=', None,
+ "Specify a script for the pre-UNINSTALL phase of RPM building"),
+ ('post-uninstall=', None,
+ "Specify a script for the post-UNINSTALL phase of RPM building"),
+
+ ('clean-script=', None,
+ "Specify a script for the CLEAN phase of RPM building"),
+
+ ('verify-script=', None,
+ "Specify a script for the VERIFY phase of the RPM build"),
+
+ # Allow a packager to explicitly force an architecture
+ ('force-arch=', None,
+ "Force an architecture onto the RPM build process"),
+
+ ('quiet', 'q',
+ "Run the INSTALL phase of RPM building in quiet mode"),
+ ]
+
+ boolean_options = ['keep-temp', 'use-rpm-opt-flags', 'rpm3-mode',
+ 'no-autoreq', 'quiet']
+
+ negative_opt = {'no-keep-temp': 'keep-temp',
+ 'no-rpm-opt-flags': 'use-rpm-opt-flags',
+ 'rpm2-mode': 'rpm3-mode'}
+
+
+ def initialize_options (self):
+ self.bdist_base = None
+ self.rpm_base = None
+ self.dist_dir = None
+ self.python = None
+ self.fix_python = None
+ self.spec_only = None
+ self.binary_only = None
+ self.source_only = None
+ self.use_bzip2 = None
+
+ self.distribution_name = None
+ self.group = None
+ self.release = None
+ self.serial = None
+ self.vendor = None
+ self.packager = None
+ self.doc_files = None
+ self.changelog = None
+ self.icon = None
+
+ self.prep_script = None
+ self.build_script = None
+ self.install_script = None
+ self.clean_script = None
+ self.verify_script = None
+ self.pre_install = None
+ self.post_install = None
+ self.pre_uninstall = None
+ self.post_uninstall = None
+ self.prep = None
+ self.provides = None
+ self.requires = None
+ self.conflicts = None
+ self.build_requires = None
+ self.obsoletes = None
+
+ self.keep_temp = 0
+ self.use_rpm_opt_flags = 1
+ self.rpm3_mode = 1
+ self.no_autoreq = 0
+
+ self.force_arch = None
+ self.quiet = 0
+
+ # initialize_options()
+
+
+ def finalize_options (self):
+ self.set_undefined_options('bdist', ('bdist_base', 'bdist_base'))
+ if self.rpm_base is None:
+ if not self.rpm3_mode:
+ raise DistutilsOptionError, \
+ "you must specify --rpm-base in RPM 2 mode"
+ self.rpm_base = os.path.join(self.bdist_base, "rpm")
+
+ if self.python is None:
+ if self.fix_python:
+ self.python = sys.executable
+ else:
+ self.python = "python"
+ elif self.fix_python:
+ raise DistutilsOptionError, \
+ "--python and --fix-python are mutually exclusive options"
+
+ if os.name != 'posix':
+ raise DistutilsPlatformError, \
+ ("don't know how to create RPM "
+ "distributions on platform %s" % os.name)
+ if self.binary_only and self.source_only:
+ raise DistutilsOptionError, \
+ "cannot supply both '--source-only' and '--binary-only'"
+
+ # don't pass CFLAGS to pure python distributions
+ if not self.distribution.has_ext_modules():
+ self.use_rpm_opt_flags = 0
+
+ self.set_undefined_options('bdist', ('dist_dir', 'dist_dir'))
+ self.finalize_package_data()
+
+ # finalize_options()
+
+ def finalize_package_data (self):
+ self.ensure_string('group', "Development/Libraries")
+ self.ensure_string('vendor',
+ "%s <%s>" % (self.distribution.get_contact(),
+ self.distribution.get_contact_email()))
+ self.ensure_string('packager')
+ self.ensure_string_list('doc_files')
+ if isinstance(self.doc_files, list):
+ for readme in ('README', 'README.txt'):
+ if os.path.exists(readme) and readme not in self.doc_files:
+ self.doc_files.append(readme)
+
+ self.ensure_string('release', "1")
+ self.ensure_string('serial') # should it be an int?
+
+ self.ensure_string('distribution_name')
+
+ self.ensure_string('changelog')
+ # Format changelog correctly
+ self.changelog = self._format_changelog(self.changelog)
+
+ self.ensure_filename('icon')
+
+ self.ensure_filename('prep_script')
+ self.ensure_filename('build_script')
+ self.ensure_filename('install_script')
+ self.ensure_filename('clean_script')
+ self.ensure_filename('verify_script')
+ self.ensure_filename('pre_install')
+ self.ensure_filename('post_install')
+ self.ensure_filename('pre_uninstall')
+ self.ensure_filename('post_uninstall')
+
+ # XXX don't forget we punted on summaries and descriptions -- they
+ # should be handled here eventually!
+
+ # Now *this* is some meta-data that belongs in the setup script...
+ self.ensure_string_list('provides')
+ self.ensure_string_list('requires')
+ self.ensure_string_list('conflicts')
+ self.ensure_string_list('build_requires')
+ self.ensure_string_list('obsoletes')
+
+ self.ensure_string('force_arch')
+ # finalize_package_data ()
+
+
+ def run (self):
+
+ if DEBUG:
+ print "before _get_package_data():"
+ print "vendor =", self.vendor
+ print "packager =", self.packager
+ print "doc_files =", self.doc_files
+ print "changelog =", self.changelog
+
+ # make directories
+ if self.spec_only:
+ spec_dir = self.dist_dir
+ self.mkpath(spec_dir)
+ else:
+ rpm_dir = {}
+ for d in ('SOURCES', 'SPECS', 'BUILD', 'RPMS', 'SRPMS'):
+ rpm_dir[d] = os.path.join(self.rpm_base, d)
+ self.mkpath(rpm_dir[d])
+ spec_dir = rpm_dir['SPECS']
+
+ # Spec file goes into 'dist_dir' if '--spec-only specified',
+ # build/rpm. otherwise.
+ spec_path = os.path.join(spec_dir,
+ "%s.spec" % self.distribution.get_name())
+ self.execute(write_file,
+ (spec_path,
+ self._make_spec_file()),
+ "writing '%s'" % spec_path)
+
+ if self.spec_only: # stop if requested
+ return
+
+ # Make a source distribution and copy to SOURCES directory with
+ # optional icon.
+ saved_dist_files = self.distribution.dist_files[:]
+ sdist = self.reinitialize_command('sdist')
+ if self.use_bzip2:
+ sdist.formats = ['bztar']
+ else:
+ sdist.formats = ['gztar']
+ self.run_command('sdist')
+ self.distribution.dist_files = saved_dist_files
+
+ source = sdist.get_archive_files()[0]
+ source_dir = rpm_dir['SOURCES']
+ self.copy_file(source, source_dir)
+
+ if self.icon:
+ if os.path.exists(self.icon):
+ self.copy_file(self.icon, source_dir)
+ else:
+ raise DistutilsFileError, \
+ "icon file '%s' does not exist" % self.icon
+
+
+ # build package
+ log.info("building RPMs")
+ rpm_cmd = ['rpm']
+ if os.path.exists('/usr/bin/rpmbuild') or \
+ os.path.exists('/bin/rpmbuild'):
+ rpm_cmd = ['rpmbuild']
+
+ if self.source_only: # what kind of RPMs?
+ rpm_cmd.append('-bs')
+ elif self.binary_only:
+ rpm_cmd.append('-bb')
+ else:
+ rpm_cmd.append('-ba')
+ if self.rpm3_mode:
+ rpm_cmd.extend(['--define',
+ '_topdir %s' % os.path.abspath(self.rpm_base)])
+ if not self.keep_temp:
+ rpm_cmd.append('--clean')
+
+ if self.quiet:
+ rpm_cmd.append('--quiet')
+
+ rpm_cmd.append(spec_path)
+ # Determine the binary rpm names that should be built out of this spec
+ # file
+ # Note that some of these may not be really built (if the file
+ # list is empty)
+ nvr_string = "%{name}-%{version}-%{release}"
+ src_rpm = nvr_string + ".src.rpm"
+ non_src_rpm = "%{arch}/" + nvr_string + ".%{arch}.rpm"
+ q_cmd = r"rpm -q --qf '%s %s\n' --specfile '%s'" % (
+ src_rpm, non_src_rpm, spec_path)
+
+ out = os.popen(q_cmd)
+ try:
+ binary_rpms = []
+ source_rpm = None
+ while 1:
+ line = out.readline()
+ if not line:
+ break
+ l = string.split(string.strip(line))
+ assert(len(l) == 2)
+ binary_rpms.append(l[1])
+ # The source rpm is named after the first entry in the spec file
+ if source_rpm is None:
+ source_rpm = l[0]
+
+ status = out.close()
+ if status:
+ raise DistutilsExecError("Failed to execute: %s" % repr(q_cmd))
+
+ finally:
+ out.close()
+
+ self.spawn(rpm_cmd)
+
+ if not self.dry_run:
+ if self.distribution.has_ext_modules():
+ pyversion = get_python_version()
+ else:
+ pyversion = 'any'
+
+ if not self.binary_only:
+ srpm = os.path.join(rpm_dir['SRPMS'], source_rpm)
+ assert(os.path.exists(srpm))
+ self.move_file(srpm, self.dist_dir)
+ filename = os.path.join(self.dist_dir, source_rpm)
+ self.distribution.dist_files.append(
+ ('bdist_rpm', pyversion, filename))
+
+ if not self.source_only:
+ for rpm in binary_rpms:
+ rpm = os.path.join(rpm_dir['RPMS'], rpm)
+ if os.path.exists(rpm):
+ self.move_file(rpm, self.dist_dir)
+ filename = os.path.join(self.dist_dir,
+ os.path.basename(rpm))
+ self.distribution.dist_files.append(
+ ('bdist_rpm', pyversion, filename))
+ # run()
+
+ def _dist_path(self, path):
+ return os.path.join(self.dist_dir, os.path.basename(path))
+
+ def _make_spec_file(self):
+ """Generate the text of an RPM spec file and return it as a
+ list of strings (one per line).
+ """
+ # definitions and headers
+ spec_file = [
+ '%define name ' + self.distribution.get_name(),
+ '%define version ' + self.distribution.get_version().replace('-','_'),
+ '%define unmangled_version ' + self.distribution.get_version(),
+ '%define release ' + self.release.replace('-','_'),
+ '',
+ 'Summary: ' + self.distribution.get_description(),
+ ]
+
+ # put locale summaries into spec file
+ # XXX not supported for now (hard to put a dictionary
+ # in a config file -- arg!)
+ #for locale in self.summaries.keys():
+ # spec_file.append('Summary(%s): %s' % (locale,
+ # self.summaries[locale]))
+
+ spec_file.extend([
+ 'Name: %{name}',
+ 'Version: %{version}',
+ 'Release: %{release}',])
+
+ # XXX yuck! this filename is available from the "sdist" command,
+ # but only after it has run: and we create the spec file before
+ # running "sdist", in case of --spec-only.
+ if self.use_bzip2:
+ spec_file.append('Source0: %{name}-%{unmangled_version}.tar.bz2')
+ else:
+ spec_file.append('Source0: %{name}-%{unmangled_version}.tar.gz')
+
+ spec_file.extend([
+ 'License: ' + self.distribution.get_license(),
+ 'Group: ' + self.group,
+ 'BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot',
+ 'Prefix: %{_prefix}', ])
+
+ if not self.force_arch:
+ # noarch if no extension modules
+ if not self.distribution.has_ext_modules():
+ spec_file.append('BuildArch: noarch')
+ else:
+ spec_file.append( 'BuildArch: %s' % self.force_arch )
+
+ for field in ('Vendor',
+ 'Packager',
+ 'Provides',
+ 'Requires',
+ 'Conflicts',
+ 'Obsoletes',
+ ):
+ val = getattr(self, string.lower(field))
+ if isinstance(val, list):
+ spec_file.append('%s: %s' % (field, string.join(val)))
+ elif val is not None:
+ spec_file.append('%s: %s' % (field, val))
+
+
+ if self.distribution.get_url() != 'UNKNOWN':
+ spec_file.append('Url: ' + self.distribution.get_url())
+
+ if self.distribution_name:
+ spec_file.append('Distribution: ' + self.distribution_name)
+
+ if self.build_requires:
+ spec_file.append('BuildRequires: ' +
+ string.join(self.build_requires))
+
+ if self.icon:
+ spec_file.append('Icon: ' + os.path.basename(self.icon))
+
+ if self.no_autoreq:
+ spec_file.append('AutoReq: 0')
+
+ spec_file.extend([
+ '',
+ '%description',
+ self.distribution.get_long_description()
+ ])
+
+ # put locale descriptions into spec file
+ # XXX again, suppressed because config file syntax doesn't
+ # easily support this ;-(
+ #for locale in self.descriptions.keys():
+ # spec_file.extend([
+ # '',
+ # '%description -l ' + locale,
+ # self.descriptions[locale],
+ # ])
+
+ # rpm scripts
+ # figure out default build script
+ def_setup_call = "%s %s" % (self.python,os.path.basename(sys.argv[0]))
+ def_build = "%s build" % def_setup_call
+ if self.use_rpm_opt_flags:
+ def_build = 'env CFLAGS="$RPM_OPT_FLAGS" ' + def_build
+
+ # insert contents of files
+
+ # XXX this is kind of misleading: user-supplied options are files
+ # that we open and interpolate into the spec file, but the defaults
+ # are just text that we drop in as-is. Hmmm.
+
+ install_cmd = ('%s install -O1 --root=$RPM_BUILD_ROOT '
+ '--record=INSTALLED_FILES') % def_setup_call
+
+ script_options = [
+ ('prep', 'prep_script', "%setup -n %{name}-%{unmangled_version}"),
+ ('build', 'build_script', def_build),
+ ('install', 'install_script', install_cmd),
+ ('clean', 'clean_script', "rm -rf $RPM_BUILD_ROOT"),
+ ('verifyscript', 'verify_script', None),
+ ('pre', 'pre_install', None),
+ ('post', 'post_install', None),
+ ('preun', 'pre_uninstall', None),
+ ('postun', 'post_uninstall', None),
+ ]
+
+ for (rpm_opt, attr, default) in script_options:
+ # Insert contents of file referred to, if no file is referred to
+ # use 'default' as contents of script
+ val = getattr(self, attr)
+ if val or default:
+ spec_file.extend([
+ '',
+ '%' + rpm_opt,])
+ if val:
+ spec_file.extend(string.split(open(val, 'r').read(), '\n'))
+ else:
+ spec_file.append(default)
+
+
+ # files section
+ spec_file.extend([
+ '',
+ '%files -f INSTALLED_FILES',
+ '%defattr(-,root,root)',
+ ])
+
+ if self.doc_files:
+ spec_file.append('%doc ' + string.join(self.doc_files))
+
+ if self.changelog:
+ spec_file.extend([
+ '',
+ '%changelog',])
+ spec_file.extend(self.changelog)
+
+ return spec_file
+
+ # _make_spec_file ()
+
+ def _format_changelog(self, changelog):
+ """Format the changelog correctly and convert it to a list of strings
+ """
+ if not changelog:
+ return changelog
+ new_changelog = []
+ for line in string.split(string.strip(changelog), '\n'):
+ line = string.strip(line)
+ if line[0] == '*':
+ new_changelog.extend(['', line])
+ elif line[0] == '-':
+ new_changelog.append(line)
+ else:
+ new_changelog.append(' ' + line)
+
+ # strip trailing newline inserted by first changelog entry
+ if not new_changelog[0]:
+ del new_changelog[0]
+
+ return new_changelog
+
+ # _format_changelog()
+
+# class bdist_rpm
diff --git a/cashew/Lib/distutils/command/bdist_wininst.py b/cashew/Lib/distutils/command/bdist_wininst.py
new file mode 100644
index 0000000..aa9383a
--- /dev/null
+++ b/cashew/Lib/distutils/command/bdist_wininst.py
@@ -0,0 +1,368 @@
+"""distutils.command.bdist_wininst
+
+Implements the Distutils 'bdist_wininst' command: create a windows installer
+exe-program."""
+
+__revision__ = "$Id$"
+
+import sys
+import os
+import string
+
+from sysconfig import get_python_version
+
+from distutils.core import Command
+from distutils.dir_util import remove_tree
+from distutils.errors import DistutilsOptionError, DistutilsPlatformError
+from distutils import log
+from distutils.util import get_platform
+
+class bdist_wininst (Command):
+
+ description = "create an executable installer for MS Windows"
+
+ user_options = [('bdist-dir=', None,
+ "temporary directory for creating the distribution"),
+ ('plat-name=', 'p',
+ "platform name to embed in generated filenames "
+ "(default: %s)" % get_platform()),
+ ('keep-temp', 'k',
+ "keep the pseudo-installation tree around after " +
+ "creating the distribution archive"),
+ ('target-version=', None,
+ "require a specific python version" +
+ " on the target system"),
+ ('no-target-compile', 'c',
+ "do not compile .py to .pyc on the target system"),
+ ('no-target-optimize', 'o',
+ "do not compile .py to .pyo (optimized)"
+ "on the target system"),
+ ('dist-dir=', 'd',
+ "directory to put final built distributions in"),
+ ('bitmap=', 'b',
+ "bitmap to use for the installer instead of python-powered logo"),
+ ('title=', 't',
+ "title to display on the installer background instead of default"),
+ ('skip-build', None,
+ "skip rebuilding everything (for testing/debugging)"),
+ ('install-script=', None,
+ "basename of installation script to be run after"
+ "installation or before deinstallation"),
+ ('pre-install-script=', None,
+ "Fully qualified filename of a script to be run before "
+ "any files are installed. This script need not be in the "
+ "distribution"),
+ ('user-access-control=', None,
+ "specify Vista's UAC handling - 'none'/default=no "
+ "handling, 'auto'=use UAC if target Python installed for "
+ "all users, 'force'=always use UAC"),
+ ]
+
+ boolean_options = ['keep-temp', 'no-target-compile', 'no-target-optimize',
+ 'skip-build']
+
+ def initialize_options (self):
+ self.bdist_dir = None
+ self.plat_name = None
+ self.keep_temp = 0
+ self.no_target_compile = 0
+ self.no_target_optimize = 0
+ self.target_version = None
+ self.dist_dir = None
+ self.bitmap = None
+ self.title = None
+ self.skip_build = None
+ self.install_script = None
+ self.pre_install_script = None
+ self.user_access_control = None
+
+ # initialize_options()
+
+
+ def finalize_options (self):
+ self.set_undefined_options('bdist', ('skip_build', 'skip_build'))
+
+ if self.bdist_dir is None:
+ if self.skip_build and self.plat_name:
+ # If build is skipped and plat_name is overridden, bdist will
+ # not see the correct 'plat_name' - so set that up manually.
+ bdist = self.distribution.get_command_obj('bdist')
+ bdist.plat_name = self.plat_name
+ # next the command will be initialized using that name
+ bdist_base = self.get_finalized_command('bdist').bdist_base
+ self.bdist_dir = os.path.join(bdist_base, 'wininst')
+
+ if not self.target_version:
+ self.target_version = ""
+
+ if not self.skip_build and self.distribution.has_ext_modules():
+ short_version = get_python_version()
+ if self.target_version and self.target_version != short_version:
+ raise DistutilsOptionError, \
+ "target version can only be %s, or the '--skip-build'" \
+ " option must be specified" % (short_version,)
+ self.target_version = short_version
+
+ self.set_undefined_options('bdist',
+ ('dist_dir', 'dist_dir'),
+ ('plat_name', 'plat_name'),
+ )
+
+ if self.install_script:
+ for script in self.distribution.scripts:
+ if self.install_script == os.path.basename(script):
+ break
+ else:
+ raise DistutilsOptionError, \
+ "install_script '%s' not found in scripts" % \
+ self.install_script
+ # finalize_options()
+
+
+ def run (self):
+ if (sys.platform != "win32" and
+ (self.distribution.has_ext_modules() or
+ self.distribution.has_c_libraries())):
+ raise DistutilsPlatformError \
+ ("distribution contains extensions and/or C libraries; "
+ "must be compiled on a Windows 32 platform")
+
+ if not self.skip_build:
+ self.run_command('build')
+
+ install = self.reinitialize_command('install', reinit_subcommands=1)
+ install.root = self.bdist_dir
+ install.skip_build = self.skip_build
+ install.warn_dir = 0
+ install.plat_name = self.plat_name
+
+ install_lib = self.reinitialize_command('install_lib')
+ # we do not want to include pyc or pyo files
+ install_lib.compile = 0
+ install_lib.optimize = 0
+
+ if self.distribution.has_ext_modules():
+ # If we are building an installer for a Python version other
+ # than the one we are currently running, then we need to ensure
+ # our build_lib reflects the other Python version rather than ours.
+ # Note that for target_version!=sys.version, we must have skipped the
+ # build step, so there is no issue with enforcing the build of this
+ # version.
+ target_version = self.target_version
+ if not target_version:
+ assert self.skip_build, "Should have already checked this"
+ target_version = sys.version[0:3]
+ plat_specifier = ".%s-%s" % (self.plat_name, target_version)
+ build = self.get_finalized_command('build')
+ build.build_lib = os.path.join(build.build_base,
+ 'lib' + plat_specifier)
+
+ # Use a custom scheme for the zip-file, because we have to decide
+ # at installation time which scheme to use.
+ for key in ('purelib', 'platlib', 'headers', 'scripts', 'data'):
+ value = string.upper(key)
+ if key == 'headers':
+ value = value + '/Include/$dist_name'
+ setattr(install,
+ 'install_' + key,
+ value)
+
+ log.info("installing to %s", self.bdist_dir)
+ install.ensure_finalized()
+
+ # avoid warning of 'install_lib' about installing
+ # into a directory not in sys.path
+ sys.path.insert(0, os.path.join(self.bdist_dir, 'PURELIB'))
+
+ install.run()
+
+ del sys.path[0]
+
+ # And make an archive relative to the root of the
+ # pseudo-installation tree.
+ from tempfile import mktemp
+ archive_basename = mktemp()
+ fullname = self.distribution.get_fullname()
+ arcname = self.make_archive(archive_basename, "zip",
+ root_dir=self.bdist_dir)
+ # create an exe containing the zip-file
+ self.create_exe(arcname, fullname, self.bitmap)
+ if self.distribution.has_ext_modules():
+ pyversion = get_python_version()
+ else:
+ pyversion = 'any'
+ self.distribution.dist_files.append(('bdist_wininst', pyversion,
+ self.get_installer_filename(fullname)))
+ # remove the zip-file again
+ log.debug("removing temporary file '%s'", arcname)
+ os.remove(arcname)
+
+ if not self.keep_temp:
+ remove_tree(self.bdist_dir, dry_run=self.dry_run)
+
+ # run()
+
+ def get_inidata (self):
+ # Return data describing the installation.
+
+ lines = []
+ metadata = self.distribution.metadata
+
+ # Write the [metadata] section.
+ lines.append("[metadata]")
+
+ # 'info' will be displayed in the installer's dialog box,
+ # describing the items to be installed.
+ info = (metadata.long_description or '') + '\n'
+
+ # Escape newline characters
+ def escape(s):
+ return string.replace(s, "\n", "\\n")
+
+ for name in ["author", "author_email", "description", "maintainer",
+ "maintainer_email", "name", "url", "version"]:
+ data = getattr(metadata, name, "")
+ if data:
+ info = info + ("\n %s: %s" % \
+ (string.capitalize(name), escape(data)))
+ lines.append("%s=%s" % (name, escape(data)))
+
+ # The [setup] section contains entries controlling
+ # the installer runtime.
+ lines.append("\n[Setup]")
+ if self.install_script:
+ lines.append("install_script=%s" % self.install_script)
+ lines.append("info=%s" % escape(info))
+ lines.append("target_compile=%d" % (not self.no_target_compile))
+ lines.append("target_optimize=%d" % (not self.no_target_optimize))
+ if self.target_version:
+ lines.append("target_version=%s" % self.target_version)
+ if self.user_access_control:
+ lines.append("user_access_control=%s" % self.user_access_control)
+
+ title = self.title or self.distribution.get_fullname()
+ lines.append("title=%s" % escape(title))
+ import time
+ import distutils
+ build_info = "Built %s with distutils-%s" % \
+ (time.ctime(time.time()), distutils.__version__)
+ lines.append("build_info=%s" % build_info)
+ return string.join(lines, "\n")
+
+ # get_inidata()
+
+ def create_exe (self, arcname, fullname, bitmap=None):
+ import struct
+
+ self.mkpath(self.dist_dir)
+
+ cfgdata = self.get_inidata()
+
+ installer_name = self.get_installer_filename(fullname)
+ self.announce("creating %s" % installer_name)
+
+ if bitmap:
+ bitmapdata = open(bitmap, "rb").read()
+ bitmaplen = len(bitmapdata)
+ else:
+ bitmaplen = 0
+
+ file = open(installer_name, "wb")
+ file.write(self.get_exe_bytes())
+ if bitmap:
+ file.write(bitmapdata)
+
+ # Convert cfgdata from unicode to ascii, mbcs encoded
+ try:
+ unicode
+ except NameError:
+ pass
+ else:
+ if isinstance(cfgdata, unicode):
+ cfgdata = cfgdata.encode("mbcs")
+
+ # Append the pre-install script
+ cfgdata = cfgdata + "\0"
+ if self.pre_install_script:
+ script_data = open(self.pre_install_script, "r").read()
+ cfgdata = cfgdata + script_data + "\n\0"
+ else:
+ # empty pre-install script
+ cfgdata = cfgdata + "\0"
+ file.write(cfgdata)
+
+ # The 'magic number' 0x1234567B is used to make sure that the
+ # binary layout of 'cfgdata' is what the wininst.exe binary
+ # expects. If the layout changes, increment that number, make
+ # the corresponding changes to the wininst.exe sources, and
+ # recompile them.
+ header = struct.pack(" cur_version:
+ bv = get_build_version()
+ else:
+ if self.target_version < "2.4":
+ bv = 6.0
+ else:
+ bv = 7.1
+ else:
+ # for current version - use authoritative check.
+ bv = get_build_version()
+
+ # wininst-x.y.exe is in the same directory as this file
+ directory = os.path.dirname(__file__)
+ # we must use a wininst-x.y.exe built with the same C compiler
+ # used for python. XXX What about mingw, borland, and so on?
+
+ # if plat_name starts with "win" but is not "win32"
+ # we want to strip "win" and leave the rest (e.g. -amd64)
+ # for all other cases, we don't want any suffix
+ if self.plat_name != 'win32' and self.plat_name[:3] == 'win':
+ sfix = self.plat_name[3:]
+ else:
+ sfix = ''
+
+ filename = os.path.join(directory, "wininst-%.1f%s.exe" % (bv, sfix))
+ f = open(filename, "rb")
+ try:
+ return f.read()
+ finally:
+ f.close()
+# class bdist_wininst
diff --git a/cashew/Lib/distutils/command/build.py b/cashew/Lib/distutils/command/build.py
new file mode 100644
index 0000000..f84bf35
--- /dev/null
+++ b/cashew/Lib/distutils/command/build.py
@@ -0,0 +1,147 @@
+"""distutils.command.build
+
+Implements the Distutils 'build' command."""
+
+__revision__ = "$Id$"
+
+import sys, os
+
+from distutils.util import get_platform
+from distutils.core import Command
+from distutils.errors import DistutilsOptionError
+
+def show_compilers():
+ from distutils.ccompiler import show_compilers
+ show_compilers()
+
+class build(Command):
+
+ description = "build everything needed to install"
+
+ user_options = [
+ ('build-base=', 'b',
+ "base directory for build library"),
+ ('build-purelib=', None,
+ "build directory for platform-neutral distributions"),
+ ('build-platlib=', None,
+ "build directory for platform-specific distributions"),
+ ('build-lib=', None,
+ "build directory for all distribution (defaults to either " +
+ "build-purelib or build-platlib"),
+ ('build-scripts=', None,
+ "build directory for scripts"),
+ ('build-temp=', 't',
+ "temporary build directory"),
+ ('plat-name=', 'p',
+ "platform name to build for, if supported "
+ "(default: %s)" % get_platform()),
+ ('compiler=', 'c',
+ "specify the compiler type"),
+ ('debug', 'g',
+ "compile extensions and libraries with debugging information"),
+ ('force', 'f',
+ "forcibly build everything (ignore file timestamps)"),
+ ('executable=', 'e',
+ "specify final destination interpreter path (build.py)"),
+ ]
+
+ boolean_options = ['debug', 'force']
+
+ help_options = [
+ ('help-compiler', None,
+ "list available compilers", show_compilers),
+ ]
+
+ def initialize_options(self):
+ self.build_base = 'build'
+ # these are decided only after 'build_base' has its final value
+ # (unless overridden by the user or client)
+ self.build_purelib = None
+ self.build_platlib = None
+ self.build_lib = None
+ self.build_temp = None
+ self.build_scripts = None
+ self.compiler = None
+ self.plat_name = None
+ self.debug = None
+ self.force = 0
+ self.executable = None
+
+ def finalize_options(self):
+ if self.plat_name is None:
+ self.plat_name = get_platform()
+ else:
+ # plat-name only supported for windows (other platforms are
+ # supported via ./configure flags, if at all). Avoid misleading
+ # other platforms.
+ if os.name != 'nt':
+ raise DistutilsOptionError(
+ "--plat-name only supported on Windows (try "
+ "using './configure --help' on your platform)")
+
+ plat_specifier = ".%s-%s" % (self.plat_name, sys.version[0:3])
+
+ # Make it so Python 2.x and Python 2.x with --with-pydebug don't
+ # share the same build directories. Doing so confuses the build
+ # process for C modules
+ if hasattr(sys, 'gettotalrefcount'):
+ plat_specifier += '-pydebug'
+
+ # 'build_purelib' and 'build_platlib' just default to 'lib' and
+ # 'lib.' under the base build directory. We only use one of
+ # them for a given distribution, though --
+ if self.build_purelib is None:
+ self.build_purelib = os.path.join(self.build_base, 'lib')
+ if self.build_platlib is None:
+ self.build_platlib = os.path.join(self.build_base,
+ 'lib' + plat_specifier)
+
+ # 'build_lib' is the actual directory that we will use for this
+ # particular module distribution -- if user didn't supply it, pick
+ # one of 'build_purelib' or 'build_platlib'.
+ if self.build_lib is None:
+ if self.distribution.ext_modules:
+ self.build_lib = self.build_platlib
+ else:
+ self.build_lib = self.build_purelib
+
+ # 'build_temp' -- temporary directory for compiler turds,
+ # "build/temp."
+ if self.build_temp is None:
+ self.build_temp = os.path.join(self.build_base,
+ 'temp' + plat_specifier)
+ if self.build_scripts is None:
+ self.build_scripts = os.path.join(self.build_base,
+ 'scripts-' + sys.version[0:3])
+
+ if self.executable is None:
+ self.executable = os.path.normpath(sys.executable)
+
+ def run(self):
+ # Run all relevant sub-commands. This will be some subset of:
+ # - build_py - pure Python modules
+ # - build_clib - standalone C libraries
+ # - build_ext - Python extensions
+ # - build_scripts - (Python) scripts
+ for cmd_name in self.get_sub_commands():
+ self.run_command(cmd_name)
+
+ # -- Predicates for the sub-command list ---------------------------
+
+ def has_pure_modules (self):
+ return self.distribution.has_pure_modules()
+
+ def has_c_libraries (self):
+ return self.distribution.has_c_libraries()
+
+ def has_ext_modules (self):
+ return self.distribution.has_ext_modules()
+
+ def has_scripts (self):
+ return self.distribution.has_scripts()
+
+ sub_commands = [('build_py', has_pure_modules),
+ ('build_clib', has_c_libraries),
+ ('build_ext', has_ext_modules),
+ ('build_scripts', has_scripts),
+ ]
diff --git a/cashew/Lib/distutils/command/build_clib.py b/cashew/Lib/distutils/command/build_clib.py
new file mode 100644
index 0000000..205587e
--- /dev/null
+++ b/cashew/Lib/distutils/command/build_clib.py
@@ -0,0 +1,209 @@
+"""distutils.command.build_clib
+
+Implements the Distutils 'build_clib' command, to build a C/C++ library
+that is included in the module distribution and needed by an extension
+module."""
+
+__revision__ = "$Id$"
+
+
+# XXX this module has *lots* of code ripped-off quite transparently from
+# build_ext.py -- not surprisingly really, as the work required to build
+# a static library from a collection of C source files is not really all
+# that different from what's required to build a shared object file from
+# a collection of C source files. Nevertheless, I haven't done the
+# necessary refactoring to account for the overlap in code between the
+# two modules, mainly because a number of subtle details changed in the
+# cut 'n paste. Sigh.
+
+import os
+from distutils.core import Command
+from distutils.errors import DistutilsSetupError
+from distutils.sysconfig import customize_compiler
+from distutils import log
+
+def show_compilers():
+ from distutils.ccompiler import show_compilers
+ show_compilers()
+
+
+class build_clib(Command):
+
+ description = "build C/C++ libraries used by Python extensions"
+
+ user_options = [
+ ('build-clib=', 'b',
+ "directory to build C/C++ libraries to"),
+ ('build-temp=', 't',
+ "directory to put temporary build by-products"),
+ ('debug', 'g',
+ "compile with debugging information"),
+ ('force', 'f',
+ "forcibly build everything (ignore file timestamps)"),
+ ('compiler=', 'c',
+ "specify the compiler type"),
+ ]
+
+ boolean_options = ['debug', 'force']
+
+ help_options = [
+ ('help-compiler', None,
+ "list available compilers", show_compilers),
+ ]
+
+ def initialize_options(self):
+ self.build_clib = None
+ self.build_temp = None
+
+ # List of libraries to build
+ self.libraries = None
+
+ # Compilation options for all libraries
+ self.include_dirs = None
+ self.define = None
+ self.undef = None
+ self.debug = None
+ self.force = 0
+ self.compiler = None
+
+
+ def finalize_options(self):
+ # This might be confusing: both build-clib and build-temp default
+ # to build-temp as defined by the "build" command. This is because
+ # I think that C libraries are really just temporary build
+ # by-products, at least from the point of view of building Python
+ # extensions -- but I want to keep my options open.
+ self.set_undefined_options('build',
+ ('build_temp', 'build_clib'),
+ ('build_temp', 'build_temp'),
+ ('compiler', 'compiler'),
+ ('debug', 'debug'),
+ ('force', 'force'))
+
+ self.libraries = self.distribution.libraries
+ if self.libraries:
+ self.check_library_list(self.libraries)
+
+ if self.include_dirs is None:
+ self.include_dirs = self.distribution.include_dirs or []
+ if isinstance(self.include_dirs, str):
+ self.include_dirs = self.include_dirs.split(os.pathsep)
+
+ # XXX same as for build_ext -- what about 'self.define' and
+ # 'self.undef' ?
+
+ def run(self):
+ if not self.libraries:
+ return
+
+ # Yech -- this is cut 'n pasted from build_ext.py!
+ from distutils.ccompiler import new_compiler
+ self.compiler = new_compiler(compiler=self.compiler,
+ dry_run=self.dry_run,
+ force=self.force)
+ customize_compiler(self.compiler)
+
+ if self.include_dirs is not None:
+ self.compiler.set_include_dirs(self.include_dirs)
+ if self.define is not None:
+ # 'define' option is a list of (name,value) tuples
+ for (name,value) in self.define:
+ self.compiler.define_macro(name, value)
+ if self.undef is not None:
+ for macro in self.undef:
+ self.compiler.undefine_macro(macro)
+
+ self.build_libraries(self.libraries)
+
+
+ def check_library_list(self, libraries):
+ """Ensure that the list of libraries is valid.
+
+ `library` is presumably provided as a command option 'libraries'.
+ This method checks that it is a list of 2-tuples, where the tuples
+ are (library_name, build_info_dict).
+
+ Raise DistutilsSetupError if the structure is invalid anywhere;
+ just returns otherwise.
+ """
+ if not isinstance(libraries, list):
+ raise DistutilsSetupError, \
+ "'libraries' option must be a list of tuples"
+
+ for lib in libraries:
+ if not isinstance(lib, tuple) and len(lib) != 2:
+ raise DistutilsSetupError, \
+ "each element of 'libraries' must a 2-tuple"
+
+ name, build_info = lib
+
+ if not isinstance(name, str):
+ raise DistutilsSetupError, \
+ "first element of each tuple in 'libraries' " + \
+ "must be a string (the library name)"
+ if '/' in name or (os.sep != '/' and os.sep in name):
+ raise DistutilsSetupError, \
+ ("bad library name '%s': " +
+ "may not contain directory separators") % \
+ lib[0]
+
+ if not isinstance(build_info, dict):
+ raise DistutilsSetupError, \
+ "second element of each tuple in 'libraries' " + \
+ "must be a dictionary (build info)"
+
+ def get_library_names(self):
+ # Assume the library list is valid -- 'check_library_list()' is
+ # called from 'finalize_options()', so it should be!
+ if not self.libraries:
+ return None
+
+ lib_names = []
+ for (lib_name, build_info) in self.libraries:
+ lib_names.append(lib_name)
+ return lib_names
+
+
+ def get_source_files(self):
+ self.check_library_list(self.libraries)
+ filenames = []
+ for (lib_name, build_info) in self.libraries:
+ sources = build_info.get('sources')
+ if sources is None or not isinstance(sources, (list, tuple)):
+ raise DistutilsSetupError, \
+ ("in 'libraries' option (library '%s'), "
+ "'sources' must be present and must be "
+ "a list of source filenames") % lib_name
+
+ filenames.extend(sources)
+ return filenames
+
+ def build_libraries(self, libraries):
+ for (lib_name, build_info) in libraries:
+ sources = build_info.get('sources')
+ if sources is None or not isinstance(sources, (list, tuple)):
+ raise DistutilsSetupError, \
+ ("in 'libraries' option (library '%s'), " +
+ "'sources' must be present and must be " +
+ "a list of source filenames") % lib_name
+ sources = list(sources)
+
+ log.info("building '%s' library", lib_name)
+
+ # First, compile the source code to object files in the library
+ # directory. (This should probably change to putting object
+ # files in a temporary build directory.)
+ macros = build_info.get('macros')
+ include_dirs = build_info.get('include_dirs')
+ objects = self.compiler.compile(sources,
+ output_dir=self.build_temp,
+ macros=macros,
+ include_dirs=include_dirs,
+ debug=self.debug)
+
+ # Now "link" the object files together into a static library.
+ # (On Unix at least, this isn't really linking -- it just
+ # builds an archive. Whatever.)
+ self.compiler.create_static_lib(objects, lib_name,
+ output_dir=self.build_clib,
+ debug=self.debug)
diff --git a/cashew/Lib/distutils/command/build_ext.py b/cashew/Lib/distutils/command/build_ext.py
new file mode 100644
index 0000000..2c68be3
--- /dev/null
+++ b/cashew/Lib/distutils/command/build_ext.py
@@ -0,0 +1,769 @@
+"""distutils.command.build_ext
+
+Implements the Distutils 'build_ext' command, for building extension
+modules (currently limited to C extensions, should accommodate C++
+extensions ASAP)."""
+
+# This module should be kept compatible with Python 2.1.
+
+__revision__ = "$Id$"
+
+import sys, os, string, re
+from types import *
+from site import USER_BASE, USER_SITE
+from distutils.core import Command
+from distutils.errors import *
+from distutils.sysconfig import customize_compiler, get_python_version
+from distutils.dep_util import newer_group
+from distutils.extension import Extension
+from distutils.util import get_platform
+from distutils import log
+
+if os.name == 'nt':
+ from distutils.msvccompiler import get_build_version
+ MSVC_VERSION = int(get_build_version())
+
+# An extension name is just a dot-separated list of Python NAMEs (ie.
+# the same as a fully-qualified module name).
+extension_name_re = re.compile \
+ (r'^[a-zA-Z_][a-zA-Z_0-9]*(\.[a-zA-Z_][a-zA-Z_0-9]*)*$')
+
+
+def show_compilers ():
+ from distutils.ccompiler import show_compilers
+ show_compilers()
+
+
+class build_ext (Command):
+
+ description = "build C/C++ extensions (compile/link to build directory)"
+
+ # XXX thoughts on how to deal with complex command-line options like
+ # these, i.e. how to make it so fancy_getopt can suck them off the
+ # command line and make it look like setup.py defined the appropriate
+ # lists of tuples of what-have-you.
+ # - each command needs a callback to process its command-line options
+ # - Command.__init__() needs access to its share of the whole
+ # command line (must ultimately come from
+ # Distribution.parse_command_line())
+ # - it then calls the current command class' option-parsing
+ # callback to deal with weird options like -D, which have to
+ # parse the option text and churn out some custom data
+ # structure
+ # - that data structure (in this case, a list of 2-tuples)
+ # will then be present in the command object by the time
+ # we get to finalize_options() (i.e. the constructor
+ # takes care of both command-line and client options
+ # in between initialize_options() and finalize_options())
+
+ sep_by = " (separated by '%s')" % os.pathsep
+ user_options = [
+ ('build-lib=', 'b',
+ "directory for compiled extension modules"),
+ ('build-temp=', 't',
+ "directory for temporary files (build by-products)"),
+ ('plat-name=', 'p',
+ "platform name to cross-compile for, if supported "
+ "(default: %s)" % get_platform()),
+ ('inplace', 'i',
+ "ignore build-lib and put compiled extensions into the source " +
+ "directory alongside your pure Python modules"),
+ ('include-dirs=', 'I',
+ "list of directories to search for header files" + sep_by),
+ ('define=', 'D',
+ "C preprocessor macros to define"),
+ ('undef=', 'U',
+ "C preprocessor macros to undefine"),
+ ('libraries=', 'l',
+ "external C libraries to link with"),
+ ('library-dirs=', 'L',
+ "directories to search for external C libraries" + sep_by),
+ ('rpath=', 'R',
+ "directories to search for shared C libraries at runtime"),
+ ('link-objects=', 'O',
+ "extra explicit link objects to include in the link"),
+ ('debug', 'g',
+ "compile/link with debugging information"),
+ ('force', 'f',
+ "forcibly build everything (ignore file timestamps)"),
+ ('compiler=', 'c',
+ "specify the compiler type"),
+ ('swig-cpp', None,
+ "make SWIG create C++ files (default is C)"),
+ ('swig-opts=', None,
+ "list of SWIG command line options"),
+ ('swig=', None,
+ "path to the SWIG executable"),
+ ('user', None,
+ "add user include, library and rpath"),
+ ]
+
+ boolean_options = ['inplace', 'debug', 'force', 'swig-cpp', 'user']
+
+ help_options = [
+ ('help-compiler', None,
+ "list available compilers", show_compilers),
+ ]
+
+ def initialize_options (self):
+ self.extensions = None
+ self.build_lib = None
+ self.plat_name = None
+ self.build_temp = None
+ self.inplace = 0
+ self.package = None
+
+ self.include_dirs = None
+ self.define = None
+ self.undef = None
+ self.libraries = None
+ self.library_dirs = None
+ self.rpath = None
+ self.link_objects = None
+ self.debug = None
+ self.force = None
+ self.compiler = None
+ self.swig = None
+ self.swig_cpp = None
+ self.swig_opts = None
+ self.user = None
+
+ def finalize_options(self):
+ from distutils import sysconfig
+
+ self.set_undefined_options('build',
+ ('build_lib', 'build_lib'),
+ ('build_temp', 'build_temp'),
+ ('compiler', 'compiler'),
+ ('debug', 'debug'),
+ ('force', 'force'),
+ ('plat_name', 'plat_name'),
+ )
+
+ if self.package is None:
+ self.package = self.distribution.ext_package
+
+ self.extensions = self.distribution.ext_modules
+
+ # Make sure Python's include directories (for Python.h, pyconfig.h,
+ # etc.) are in the include search path.
+ py_include = sysconfig.get_python_inc()
+ plat_py_include = sysconfig.get_python_inc(plat_specific=1)
+ if self.include_dirs is None:
+ self.include_dirs = self.distribution.include_dirs or []
+ if isinstance(self.include_dirs, str):
+ self.include_dirs = self.include_dirs.split(os.pathsep)
+
+ # Put the Python "system" include dir at the end, so that
+ # any local include dirs take precedence.
+ self.include_dirs.append(py_include)
+ if plat_py_include != py_include:
+ self.include_dirs.append(plat_py_include)
+
+ self.ensure_string_list('libraries')
+ self.ensure_string_list('link_objects')
+
+ # Life is easier if we're not forever checking for None, so
+ # simplify these options to empty lists if unset
+ if self.libraries is None:
+ self.libraries = []
+ if self.library_dirs is None:
+ self.library_dirs = []
+ elif type(self.library_dirs) is StringType:
+ self.library_dirs = string.split(self.library_dirs, os.pathsep)
+
+ if self.rpath is None:
+ self.rpath = []
+ elif type(self.rpath) is StringType:
+ self.rpath = string.split(self.rpath, os.pathsep)
+
+ # for extensions under windows use different directories
+ # for Release and Debug builds.
+ # also Python's library directory must be appended to library_dirs
+ if os.name == 'nt':
+ # the 'libs' directory is for binary installs - we assume that
+ # must be the *native* platform. But we don't really support
+ # cross-compiling via a binary install anyway, so we let it go.
+ self.library_dirs.append(os.path.join(sys.exec_prefix, 'libs'))
+ if self.debug:
+ self.build_temp = os.path.join(self.build_temp, "Debug")
+ else:
+ self.build_temp = os.path.join(self.build_temp, "Release")
+
+ # Append the source distribution include and library directories,
+ # this allows distutils on windows to work in the source tree
+ self.include_dirs.append(os.path.join(sys.exec_prefix, 'PC'))
+ if MSVC_VERSION == 9:
+ # Use the .lib files for the correct architecture
+ if self.plat_name == 'win32':
+ suffix = ''
+ else:
+ # win-amd64 or win-ia64
+ suffix = self.plat_name[4:]
+ # We could have been built in one of two places; add both
+ for d in ('PCbuild',), ('PC', 'VS9.0'):
+ new_lib = os.path.join(sys.exec_prefix, *d)
+ if suffix:
+ new_lib = os.path.join(new_lib, suffix)
+ self.library_dirs.append(new_lib)
+
+ elif MSVC_VERSION == 8:
+ self.library_dirs.append(os.path.join(sys.exec_prefix,
+ 'PC', 'VS8.0'))
+ elif MSVC_VERSION == 7:
+ self.library_dirs.append(os.path.join(sys.exec_prefix,
+ 'PC', 'VS7.1'))
+ else:
+ self.library_dirs.append(os.path.join(sys.exec_prefix,
+ 'PC', 'VC6'))
+
+ # OS/2 (EMX) doesn't support Debug vs Release builds, but has the
+ # import libraries in its "Config" subdirectory
+ if os.name == 'os2':
+ self.library_dirs.append(os.path.join(sys.exec_prefix, 'Config'))
+
+ # for extensions under Cygwin and AtheOS Python's library directory must be
+ # appended to library_dirs
+ if sys.platform[:6] == 'cygwin' or sys.platform[:6] == 'atheos':
+ if sys.executable.startswith(os.path.join(sys.exec_prefix, "bin")):
+ # building third party extensions
+ self.library_dirs.append(os.path.join(sys.prefix, "lib",
+ "python" + get_python_version(),
+ "config"))
+ else:
+ # building python standard extensions
+ self.library_dirs.append('.')
+
+ # For building extensions with a shared Python library,
+ # Python's library directory must be appended to library_dirs
+ # See Issues: #1600860, #4366
+ if (sysconfig.get_config_var('Py_ENABLE_SHARED')):
+ if not sysconfig.python_build:
+ # building third party extensions
+ self.library_dirs.append(sysconfig.get_config_var('LIBDIR'))
+ else:
+ # building python standard extensions
+ self.library_dirs.append('.')
+
+ # The argument parsing will result in self.define being a string, but
+ # it has to be a list of 2-tuples. All the preprocessor symbols
+ # specified by the 'define' option will be set to '1'. Multiple
+ # symbols can be separated with commas.
+
+ if self.define:
+ defines = self.define.split(',')
+ self.define = map(lambda symbol: (symbol, '1'), defines)
+
+ # The option for macros to undefine is also a string from the
+ # option parsing, but has to be a list. Multiple symbols can also
+ # be separated with commas here.
+ if self.undef:
+ self.undef = self.undef.split(',')
+
+ if self.swig_opts is None:
+ self.swig_opts = []
+ else:
+ self.swig_opts = self.swig_opts.split(' ')
+
+ # Finally add the user include and library directories if requested
+ if self.user:
+ user_include = os.path.join(USER_BASE, "include")
+ user_lib = os.path.join(USER_BASE, "lib")
+ if os.path.isdir(user_include):
+ self.include_dirs.append(user_include)
+ if os.path.isdir(user_lib):
+ self.library_dirs.append(user_lib)
+ self.rpath.append(user_lib)
+
+ def run(self):
+ from distutils.ccompiler import new_compiler
+
+ # 'self.extensions', as supplied by setup.py, is a list of
+ # Extension instances. See the documentation for Extension (in
+ # distutils.extension) for details.
+ #
+ # For backwards compatibility with Distutils 0.8.2 and earlier, we
+ # also allow the 'extensions' list to be a list of tuples:
+ # (ext_name, build_info)
+ # where build_info is a dictionary containing everything that
+ # Extension instances do except the name, with a few things being
+ # differently named. We convert these 2-tuples to Extension
+ # instances as needed.
+
+ if not self.extensions:
+ return
+
+ # If we were asked to build any C/C++ libraries, make sure that the
+ # directory where we put them is in the library search path for
+ # linking extensions.
+ if self.distribution.has_c_libraries():
+ build_clib = self.get_finalized_command('build_clib')
+ self.libraries.extend(build_clib.get_library_names() or [])
+ self.library_dirs.append(build_clib.build_clib)
+
+ # Setup the CCompiler object that we'll use to do all the
+ # compiling and linking
+ self.compiler = new_compiler(compiler=self.compiler,
+ verbose=self.verbose,
+ dry_run=self.dry_run,
+ force=self.force)
+ customize_compiler(self.compiler)
+ # If we are cross-compiling, init the compiler now (if we are not
+ # cross-compiling, init would not hurt, but people may rely on
+ # late initialization of compiler even if they shouldn't...)
+ if os.name == 'nt' and self.plat_name != get_platform():
+ self.compiler.initialize(self.plat_name)
+
+ # And make sure that any compile/link-related options (which might
+ # come from the command-line or from the setup script) are set in
+ # that CCompiler object -- that way, they automatically apply to
+ # all compiling and linking done here.
+ if self.include_dirs is not None:
+ self.compiler.set_include_dirs(self.include_dirs)
+ if self.define is not None:
+ # 'define' option is a list of (name,value) tuples
+ for (name, value) in self.define:
+ self.compiler.define_macro(name, value)
+ if self.undef is not None:
+ for macro in self.undef:
+ self.compiler.undefine_macro(macro)
+ if self.libraries is not None:
+ self.compiler.set_libraries(self.libraries)
+ if self.library_dirs is not None:
+ self.compiler.set_library_dirs(self.library_dirs)
+ if self.rpath is not None:
+ self.compiler.set_runtime_library_dirs(self.rpath)
+ if self.link_objects is not None:
+ self.compiler.set_link_objects(self.link_objects)
+
+ # Now actually compile and link everything.
+ self.build_extensions()
+
+ def check_extensions_list(self, extensions):
+ """Ensure that the list of extensions (presumably provided as a
+ command option 'extensions') is valid, i.e. it is a list of
+ Extension objects. We also support the old-style list of 2-tuples,
+ where the tuples are (ext_name, build_info), which are converted to
+ Extension instances here.
+
+ Raise DistutilsSetupError if the structure is invalid anywhere;
+ just returns otherwise.
+ """
+ if not isinstance(extensions, list):
+ raise DistutilsSetupError, \
+ "'ext_modules' option must be a list of Extension instances"
+
+ for i, ext in enumerate(extensions):
+ if isinstance(ext, Extension):
+ continue # OK! (assume type-checking done
+ # by Extension constructor)
+
+ if not isinstance(ext, tuple) or len(ext) != 2:
+ raise DistutilsSetupError, \
+ ("each element of 'ext_modules' option must be an "
+ "Extension instance or 2-tuple")
+
+ ext_name, build_info = ext
+
+ log.warn(("old-style (ext_name, build_info) tuple found in "
+ "ext_modules for extension '%s'"
+ "-- please convert to Extension instance" % ext_name))
+
+ if not (isinstance(ext_name, str) and
+ extension_name_re.match(ext_name)):
+ raise DistutilsSetupError, \
+ ("first element of each tuple in 'ext_modules' "
+ "must be the extension name (a string)")
+
+ if not isinstance(build_info, dict):
+ raise DistutilsSetupError, \
+ ("second element of each tuple in 'ext_modules' "
+ "must be a dictionary (build info)")
+
+ # OK, the (ext_name, build_info) dict is type-safe: convert it
+ # to an Extension instance.
+ ext = Extension(ext_name, build_info['sources'])
+
+ # Easy stuff: one-to-one mapping from dict elements to
+ # instance attributes.
+ for key in ('include_dirs', 'library_dirs', 'libraries',
+ 'extra_objects', 'extra_compile_args',
+ 'extra_link_args'):
+ val = build_info.get(key)
+ if val is not None:
+ setattr(ext, key, val)
+
+ # Medium-easy stuff: same syntax/semantics, different names.
+ ext.runtime_library_dirs = build_info.get('rpath')
+ if 'def_file' in build_info:
+ log.warn("'def_file' element of build info dict "
+ "no longer supported")
+
+ # Non-trivial stuff: 'macros' split into 'define_macros'
+ # and 'undef_macros'.
+ macros = build_info.get('macros')
+ if macros:
+ ext.define_macros = []
+ ext.undef_macros = []
+ for macro in macros:
+ if not (isinstance(macro, tuple) and len(macro) in (1, 2)):
+ raise DistutilsSetupError, \
+ ("'macros' element of build info dict "
+ "must be 1- or 2-tuple")
+ if len(macro) == 1:
+ ext.undef_macros.append(macro[0])
+ elif len(macro) == 2:
+ ext.define_macros.append(macro)
+
+ extensions[i] = ext
+
+ def get_source_files(self):
+ self.check_extensions_list(self.extensions)
+ filenames = []
+
+ # Wouldn't it be neat if we knew the names of header files too...
+ for ext in self.extensions:
+ filenames.extend(ext.sources)
+
+ return filenames
+
+ def get_outputs(self):
+ # Sanity check the 'extensions' list -- can't assume this is being
+ # done in the same run as a 'build_extensions()' call (in fact, we
+ # can probably assume that it *isn't*!).
+ self.check_extensions_list(self.extensions)
+
+ # And build the list of output (built) filenames. Note that this
+ # ignores the 'inplace' flag, and assumes everything goes in the
+ # "build" tree.
+ outputs = []
+ for ext in self.extensions:
+ outputs.append(self.get_ext_fullpath(ext.name))
+ return outputs
+
+ def build_extensions(self):
+ # First, sanity-check the 'extensions' list
+ self.check_extensions_list(self.extensions)
+
+ for ext in self.extensions:
+ self.build_extension(ext)
+
+ def build_extension(self, ext):
+ sources = ext.sources
+ if sources is None or type(sources) not in (ListType, TupleType):
+ raise DistutilsSetupError, \
+ ("in 'ext_modules' option (extension '%s'), " +
+ "'sources' must be present and must be " +
+ "a list of source filenames") % ext.name
+ sources = list(sources)
+
+ ext_path = self.get_ext_fullpath(ext.name)
+ depends = sources + ext.depends
+ if not (self.force or newer_group(depends, ext_path, 'newer')):
+ log.debug("skipping '%s' extension (up-to-date)", ext.name)
+ return
+ else:
+ log.info("building '%s' extension", ext.name)
+
+ # First, scan the sources for SWIG definition files (.i), run
+ # SWIG on 'em to create .c files, and modify the sources list
+ # accordingly.
+ sources = self.swig_sources(sources, ext)
+
+ # Next, compile the source code to object files.
+
+ # XXX not honouring 'define_macros' or 'undef_macros' -- the
+ # CCompiler API needs to change to accommodate this, and I
+ # want to do one thing at a time!
+
+ # Two possible sources for extra compiler arguments:
+ # - 'extra_compile_args' in Extension object
+ # - CFLAGS environment variable (not particularly
+ # elegant, but people seem to expect it and I
+ # guess it's useful)
+ # The environment variable should take precedence, and
+ # any sensible compiler will give precedence to later
+ # command line args. Hence we combine them in order:
+ extra_args = ext.extra_compile_args or []
+
+ macros = ext.define_macros[:]
+ for undef in ext.undef_macros:
+ macros.append((undef,))
+
+ objects = self.compiler.compile(sources,
+ output_dir=self.build_temp,
+ macros=macros,
+ include_dirs=ext.include_dirs,
+ debug=self.debug,
+ extra_postargs=extra_args,
+ depends=ext.depends)
+
+ # XXX -- this is a Vile HACK!
+ #
+ # The setup.py script for Python on Unix needs to be able to
+ # get this list so it can perform all the clean up needed to
+ # avoid keeping object files around when cleaning out a failed
+ # build of an extension module. Since Distutils does not
+ # track dependencies, we have to get rid of intermediates to
+ # ensure all the intermediates will be properly re-built.
+ #
+ self._built_objects = objects[:]
+
+ # Now link the object files together into a "shared object" --
+ # of course, first we have to figure out all the other things
+ # that go into the mix.
+ if ext.extra_objects:
+ objects.extend(ext.extra_objects)
+ extra_args = ext.extra_link_args or []
+
+ # Detect target language, if not provided
+ language = ext.language or self.compiler.detect_language(sources)
+
+ self.compiler.link_shared_object(
+ objects, ext_path,
+ libraries=self.get_libraries(ext),
+ library_dirs=ext.library_dirs,
+ runtime_library_dirs=ext.runtime_library_dirs,
+ extra_postargs=extra_args,
+ export_symbols=self.get_export_symbols(ext),
+ debug=self.debug,
+ build_temp=self.build_temp,
+ target_lang=language)
+
+
+ def swig_sources (self, sources, extension):
+
+ """Walk the list of source files in 'sources', looking for SWIG
+ interface (.i) files. Run SWIG on all that are found, and
+ return a modified 'sources' list with SWIG source files replaced
+ by the generated C (or C++) files.
+ """
+
+ new_sources = []
+ swig_sources = []
+ swig_targets = {}
+
+ # XXX this drops generated C/C++ files into the source tree, which
+ # is fine for developers who want to distribute the generated
+ # source -- but there should be an option to put SWIG output in
+ # the temp dir.
+
+ if self.swig_cpp:
+ log.warn("--swig-cpp is deprecated - use --swig-opts=-c++")
+
+ if self.swig_cpp or ('-c++' in self.swig_opts) or \
+ ('-c++' in extension.swig_opts):
+ target_ext = '.cpp'
+ else:
+ target_ext = '.c'
+
+ for source in sources:
+ (base, ext) = os.path.splitext(source)
+ if ext == ".i": # SWIG interface file
+ new_sources.append(base + '_wrap' + target_ext)
+ swig_sources.append(source)
+ swig_targets[source] = new_sources[-1]
+ else:
+ new_sources.append(source)
+
+ if not swig_sources:
+ return new_sources
+
+ swig = self.swig or self.find_swig()
+ swig_cmd = [swig, "-python"]
+ swig_cmd.extend(self.swig_opts)
+ if self.swig_cpp:
+ swig_cmd.append("-c++")
+
+ # Do not override commandline arguments
+ if not self.swig_opts:
+ for o in extension.swig_opts:
+ swig_cmd.append(o)
+
+ for source in swig_sources:
+ target = swig_targets[source]
+ log.info("swigging %s to %s", source, target)
+ self.spawn(swig_cmd + ["-o", target, source])
+
+ return new_sources
+
+ # swig_sources ()
+
+ def find_swig (self):
+ """Return the name of the SWIG executable. On Unix, this is
+ just "swig" -- it should be in the PATH. Tries a bit harder on
+ Windows.
+ """
+
+ if os.name == "posix":
+ return "swig"
+ elif os.name == "nt":
+
+ # Look for SWIG in its standard installation directory on
+ # Windows (or so I presume!). If we find it there, great;
+ # if not, act like Unix and assume it's in the PATH.
+ for vers in ("1.3", "1.2", "1.1"):
+ fn = os.path.join("c:\\swig%s" % vers, "swig.exe")
+ if os.path.isfile(fn):
+ return fn
+ else:
+ return "swig.exe"
+
+ elif os.name == "os2":
+ # assume swig available in the PATH.
+ return "swig.exe"
+
+ else:
+ raise DistutilsPlatformError, \
+ ("I don't know how to find (much less run) SWIG "
+ "on platform '%s'") % os.name
+
+ # find_swig ()
+
+ # -- Name generators -----------------------------------------------
+ # (extension names, filenames, whatever)
+ def get_ext_fullpath(self, ext_name):
+ """Returns the path of the filename for a given extension.
+
+ The file is located in `build_lib` or directly in the package
+ (inplace option).
+ """
+ # makes sure the extension name is only using dots
+ all_dots = string.maketrans('/'+os.sep, '..')
+ ext_name = ext_name.translate(all_dots)
+
+ fullname = self.get_ext_fullname(ext_name)
+ modpath = fullname.split('.')
+ filename = self.get_ext_filename(ext_name)
+ filename = os.path.split(filename)[-1]
+
+ if not self.inplace:
+ # no further work needed
+ # returning :
+ # build_dir/package/path/filename
+ filename = os.path.join(*modpath[:-1]+[filename])
+ return os.path.join(self.build_lib, filename)
+
+ # the inplace option requires to find the package directory
+ # using the build_py command for that
+ package = '.'.join(modpath[0:-1])
+ build_py = self.get_finalized_command('build_py')
+ package_dir = os.path.abspath(build_py.get_package_dir(package))
+
+ # returning
+ # package_dir/filename
+ return os.path.join(package_dir, filename)
+
+ def get_ext_fullname(self, ext_name):
+ """Returns the fullname of a given extension name.
+
+ Adds the `package.` prefix"""
+ if self.package is None:
+ return ext_name
+ else:
+ return self.package + '.' + ext_name
+
+ def get_ext_filename(self, ext_name):
+ r"""Convert the name of an extension (eg. "foo.bar") into the name
+ of the file from which it will be loaded (eg. "foo/bar.so", or
+ "foo\bar.pyd").
+ """
+ from distutils.sysconfig import get_config_var
+ ext_path = string.split(ext_name, '.')
+ # OS/2 has an 8 character module (extension) limit :-(
+ if os.name == "os2":
+ ext_path[len(ext_path) - 1] = ext_path[len(ext_path) - 1][:8]
+ # extensions in debug_mode are named 'module_d.pyd' under windows
+ so_ext = get_config_var('SO')
+ if os.name == 'nt' and self.debug:
+ return os.path.join(*ext_path) + '_d' + so_ext
+ return os.path.join(*ext_path) + so_ext
+
+ def get_export_symbols (self, ext):
+ """Return the list of symbols that a shared extension has to
+ export. This either uses 'ext.export_symbols' or, if it's not
+ provided, "init" + module_name. Only relevant on Windows, where
+ the .pyd file (DLL) must export the module "init" function.
+ """
+ initfunc_name = "init" + ext.name.split('.')[-1]
+ if initfunc_name not in ext.export_symbols:
+ ext.export_symbols.append(initfunc_name)
+ return ext.export_symbols
+
+ def get_libraries (self, ext):
+ """Return the list of libraries to link against when building a
+ shared extension. On most platforms, this is just 'ext.libraries';
+ on Windows and OS/2, we add the Python library (eg. python20.dll).
+ """
+ # The python library is always needed on Windows. For MSVC, this
+ # is redundant, since the library is mentioned in a pragma in
+ # pyconfig.h that MSVC groks. The other Windows compilers all seem
+ # to need it mentioned explicitly, though, so that's what we do.
+ # Append '_d' to the python import library on debug builds.
+ if sys.platform == "win32":
+ from distutils.msvccompiler import MSVCCompiler
+ if not isinstance(self.compiler, MSVCCompiler):
+ template = "python%d%d"
+ if self.debug:
+ template = template + '_d'
+ pythonlib = (template %
+ (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff))
+ # don't extend ext.libraries, it may be shared with other
+ # extensions, it is a reference to the original list
+ return ext.libraries + [pythonlib]
+ else:
+ return ext.libraries
+ elif sys.platform == "os2emx":
+ # EMX/GCC requires the python library explicitly, and I
+ # believe VACPP does as well (though not confirmed) - AIM Apr01
+ template = "python%d%d"
+ # debug versions of the main DLL aren't supported, at least
+ # not at this time - AIM Apr01
+ #if self.debug:
+ # template = template + '_d'
+ pythonlib = (template %
+ (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff))
+ # don't extend ext.libraries, it may be shared with other
+ # extensions, it is a reference to the original list
+ return ext.libraries + [pythonlib]
+ elif sys.platform[:6] == "cygwin":
+ template = "python%d.%d"
+ pythonlib = (template %
+ (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff))
+ # don't extend ext.libraries, it may be shared with other
+ # extensions, it is a reference to the original list
+ return ext.libraries + [pythonlib]
+ elif sys.platform[:6] == "atheos":
+ from distutils import sysconfig
+
+ template = "python%d.%d"
+ pythonlib = (template %
+ (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff))
+ # Get SHLIBS from Makefile
+ extra = []
+ for lib in sysconfig.get_config_var('SHLIBS').split():
+ if lib.startswith('-l'):
+ extra.append(lib[2:])
+ else:
+ extra.append(lib)
+ # don't extend ext.libraries, it may be shared with other
+ # extensions, it is a reference to the original list
+ return ext.libraries + [pythonlib, "m"] + extra
+
+ elif sys.platform == 'darwin':
+ # Don't use the default code below
+ return ext.libraries
+ elif sys.platform[:3] == 'aix':
+ # Don't use the default code below
+ return ext.libraries
+ else:
+ from distutils import sysconfig
+ if sysconfig.get_config_var('Py_ENABLE_SHARED'):
+ template = "python%d.%d"
+ pythonlib = (template %
+ (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff))
+ return ext.libraries + [pythonlib]
+ else:
+ return ext.libraries
+
+# class build_ext
diff --git a/cashew/Lib/distutils/command/build_py.py b/cashew/Lib/distutils/command/build_py.py
new file mode 100644
index 0000000..c123c62
--- /dev/null
+++ b/cashew/Lib/distutils/command/build_py.py
@@ -0,0 +1,394 @@
+"""distutils.command.build_py
+
+Implements the Distutils 'build_py' command."""
+
+__revision__ = "$Id$"
+
+import os
+import sys
+from glob import glob
+
+from distutils.core import Command
+from distutils.errors import DistutilsOptionError, DistutilsFileError
+from distutils.util import convert_path
+from distutils import log
+
+class build_py(Command):
+
+ description = "\"build\" pure Python modules (copy to build directory)"
+
+ user_options = [
+ ('build-lib=', 'd', "directory to \"build\" (copy) to"),
+ ('compile', 'c', "compile .py to .pyc"),
+ ('no-compile', None, "don't compile .py files [default]"),
+ ('optimize=', 'O',
+ "also compile with optimization: -O1 for \"python -O\", "
+ "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"),
+ ('force', 'f', "forcibly build everything (ignore file timestamps)"),
+ ]
+
+ boolean_options = ['compile', 'force']
+ negative_opt = {'no-compile' : 'compile'}
+
+ def initialize_options(self):
+ self.build_lib = None
+ self.py_modules = None
+ self.package = None
+ self.package_data = None
+ self.package_dir = None
+ self.compile = 0
+ self.optimize = 0
+ self.force = None
+
+ def finalize_options(self):
+ self.set_undefined_options('build',
+ ('build_lib', 'build_lib'),
+ ('force', 'force'))
+
+ # Get the distribution options that are aliases for build_py
+ # options -- list of packages and list of modules.
+ self.packages = self.distribution.packages
+ self.py_modules = self.distribution.py_modules
+ self.package_data = self.distribution.package_data
+ self.package_dir = {}
+ if self.distribution.package_dir:
+ for name, path in self.distribution.package_dir.items():
+ self.package_dir[name] = convert_path(path)
+ self.data_files = self.get_data_files()
+
+ # Ick, copied straight from install_lib.py (fancy_getopt needs a
+ # type system! Hell, *everything* needs a type system!!!)
+ if not isinstance(self.optimize, int):
+ try:
+ self.optimize = int(self.optimize)
+ assert 0 <= self.optimize <= 2
+ except (ValueError, AssertionError):
+ raise DistutilsOptionError("optimize must be 0, 1, or 2")
+
+ def run(self):
+ # XXX copy_file by default preserves atime and mtime. IMHO this is
+ # the right thing to do, but perhaps it should be an option -- in
+ # particular, a site administrator might want installed files to
+ # reflect the time of installation rather than the last
+ # modification time before the installed release.
+
+ # XXX copy_file by default preserves mode, which appears to be the
+ # wrong thing to do: if a file is read-only in the working
+ # directory, we want it to be installed read/write so that the next
+ # installation of the same module distribution can overwrite it
+ # without problems. (This might be a Unix-specific issue.) Thus
+ # we turn off 'preserve_mode' when copying to the build directory,
+ # since the build directory is supposed to be exactly what the
+ # installation will look like (ie. we preserve mode when
+ # installing).
+
+ # Two options control which modules will be installed: 'packages'
+ # and 'py_modules'. The former lets us work with whole packages, not
+ # specifying individual modules at all; the latter is for
+ # specifying modules one-at-a-time.
+
+ if self.py_modules:
+ self.build_modules()
+ if self.packages:
+ self.build_packages()
+ self.build_package_data()
+
+ self.byte_compile(self.get_outputs(include_bytecode=0))
+
+ def get_data_files(self):
+ """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
+ data = []
+ if not self.packages:
+ return data
+ for package in self.packages:
+ # Locate package source directory
+ src_dir = self.get_package_dir(package)
+
+ # Compute package build directory
+ build_dir = os.path.join(*([self.build_lib] + package.split('.')))
+
+ # Length of path to strip from found files
+ plen = 0
+ if src_dir:
+ plen = len(src_dir)+1
+
+ # Strip directory from globbed filenames
+ filenames = [
+ file[plen:] for file in self.find_data_files(package, src_dir)
+ ]
+ data.append((package, src_dir, build_dir, filenames))
+ return data
+
+ def find_data_files(self, package, src_dir):
+ """Return filenames for package's data files in 'src_dir'"""
+ globs = (self.package_data.get('', [])
+ + self.package_data.get(package, []))
+ files = []
+ for pattern in globs:
+ # Each pattern has to be converted to a platform-specific path
+ filelist = glob(os.path.join(src_dir, convert_path(pattern)))
+ # Files that match more than one pattern are only added once
+ files.extend([fn for fn in filelist if fn not in files
+ and os.path.isfile(fn)])
+ return files
+
+ def build_package_data(self):
+ """Copy data files into build directory"""
+ for package, src_dir, build_dir, filenames in self.data_files:
+ for filename in filenames:
+ target = os.path.join(build_dir, filename)
+ self.mkpath(os.path.dirname(target))
+ self.copy_file(os.path.join(src_dir, filename), target,
+ preserve_mode=False)
+
+ def get_package_dir(self, package):
+ """Return the directory, relative to the top of the source
+ distribution, where package 'package' should be found
+ (at least according to the 'package_dir' option, if any)."""
+
+ path = package.split('.')
+
+ if not self.package_dir:
+ if path:
+ return os.path.join(*path)
+ else:
+ return ''
+ else:
+ tail = []
+ while path:
+ try:
+ pdir = self.package_dir['.'.join(path)]
+ except KeyError:
+ tail.insert(0, path[-1])
+ del path[-1]
+ else:
+ tail.insert(0, pdir)
+ return os.path.join(*tail)
+ else:
+ # Oops, got all the way through 'path' without finding a
+ # match in package_dir. If package_dir defines a directory
+ # for the root (nameless) package, then fallback on it;
+ # otherwise, we might as well have not consulted
+ # package_dir at all, as we just use the directory implied
+ # by 'tail' (which should be the same as the original value
+ # of 'path' at this point).
+ pdir = self.package_dir.get('')
+ if pdir is not None:
+ tail.insert(0, pdir)
+
+ if tail:
+ return os.path.join(*tail)
+ else:
+ return ''
+
+ def check_package(self, package, package_dir):
+ # Empty dir name means current directory, which we can probably
+ # assume exists. Also, os.path.exists and isdir don't know about
+ # my "empty string means current dir" convention, so we have to
+ # circumvent them.
+ if package_dir != "":
+ if not os.path.exists(package_dir):
+ raise DistutilsFileError(
+ "package directory '%s' does not exist" % package_dir)
+ if not os.path.isdir(package_dir):
+ raise DistutilsFileError(
+ "supposed package directory '%s' exists, "
+ "but is not a directory" % package_dir)
+
+ # Require __init__.py for all but the "root package"
+ if package:
+ init_py = os.path.join(package_dir, "__init__.py")
+ if os.path.isfile(init_py):
+ return init_py
+ else:
+ log.warn(("package init file '%s' not found " +
+ "(or not a regular file)"), init_py)
+
+ # Either not in a package at all (__init__.py not expected), or
+ # __init__.py doesn't exist -- so don't return the filename.
+ return None
+
+ def check_module(self, module, module_file):
+ if not os.path.isfile(module_file):
+ log.warn("file %s (for module %s) not found", module_file, module)
+ return False
+ else:
+ return True
+
+ def find_package_modules(self, package, package_dir):
+ self.check_package(package, package_dir)
+ module_files = glob(os.path.join(package_dir, "*.py"))
+ modules = []
+ setup_script = os.path.abspath(self.distribution.script_name)
+
+ for f in module_files:
+ abs_f = os.path.abspath(f)
+ if abs_f != setup_script:
+ module = os.path.splitext(os.path.basename(f))[0]
+ modules.append((package, module, f))
+ else:
+ self.debug_print("excluding %s" % setup_script)
+ return modules
+
+ def find_modules(self):
+ """Finds individually-specified Python modules, ie. those listed by
+ module name in 'self.py_modules'. Returns a list of tuples (package,
+ module_base, filename): 'package' is a tuple of the path through
+ package-space to the module; 'module_base' is the bare (no
+ packages, no dots) module name, and 'filename' is the path to the
+ ".py" file (relative to the distribution root) that implements the
+ module.
+ """
+ # Map package names to tuples of useful info about the package:
+ # (package_dir, checked)
+ # package_dir - the directory where we'll find source files for
+ # this package
+ # checked - true if we have checked that the package directory
+ # is valid (exists, contains __init__.py, ... ?)
+ packages = {}
+
+ # List of (package, module, filename) tuples to return
+ modules = []
+
+ # We treat modules-in-packages almost the same as toplevel modules,
+ # just the "package" for a toplevel is empty (either an empty
+ # string or empty list, depending on context). Differences:
+ # - don't check for __init__.py in directory for empty package
+ for module in self.py_modules:
+ path = module.split('.')
+ package = '.'.join(path[0:-1])
+ module_base = path[-1]
+
+ try:
+ (package_dir, checked) = packages[package]
+ except KeyError:
+ package_dir = self.get_package_dir(package)
+ checked = 0
+
+ if not checked:
+ init_py = self.check_package(package, package_dir)
+ packages[package] = (package_dir, 1)
+ if init_py:
+ modules.append((package, "__init__", init_py))
+
+ # XXX perhaps we should also check for just .pyc files
+ # (so greedy closed-source bastards can distribute Python
+ # modules too)
+ module_file = os.path.join(package_dir, module_base + ".py")
+ if not self.check_module(module, module_file):
+ continue
+
+ modules.append((package, module_base, module_file))
+
+ return modules
+
+ def find_all_modules(self):
+ """Compute the list of all modules that will be built, whether
+ they are specified one-module-at-a-time ('self.py_modules') or
+ by whole packages ('self.packages'). Return a list of tuples
+ (package, module, module_file), just like 'find_modules()' and
+ 'find_package_modules()' do."""
+ modules = []
+ if self.py_modules:
+ modules.extend(self.find_modules())
+ if self.packages:
+ for package in self.packages:
+ package_dir = self.get_package_dir(package)
+ m = self.find_package_modules(package, package_dir)
+ modules.extend(m)
+ return modules
+
+ def get_source_files(self):
+ return [module[-1] for module in self.find_all_modules()]
+
+ def get_module_outfile(self, build_dir, package, module):
+ outfile_path = [build_dir] + list(package) + [module + ".py"]
+ return os.path.join(*outfile_path)
+
+ def get_outputs(self, include_bytecode=1):
+ modules = self.find_all_modules()
+ outputs = []
+ for (package, module, module_file) in modules:
+ package = package.split('.')
+ filename = self.get_module_outfile(self.build_lib, package, module)
+ outputs.append(filename)
+ if include_bytecode:
+ if self.compile:
+ outputs.append(filename + "c")
+ if self.optimize > 0:
+ outputs.append(filename + "o")
+
+ outputs += [
+ os.path.join(build_dir, filename)
+ for package, src_dir, build_dir, filenames in self.data_files
+ for filename in filenames
+ ]
+
+ return outputs
+
+ def build_module(self, module, module_file, package):
+ if isinstance(package, str):
+ package = package.split('.')
+ elif not isinstance(package, (list, tuple)):
+ raise TypeError(
+ "'package' must be a string (dot-separated), list, or tuple")
+
+ # Now put the module source file into the "build" area -- this is
+ # easy, we just copy it somewhere under self.build_lib (the build
+ # directory for Python source).
+ outfile = self.get_module_outfile(self.build_lib, package, module)
+ dir = os.path.dirname(outfile)
+ self.mkpath(dir)
+ return self.copy_file(module_file, outfile, preserve_mode=0)
+
+ def build_modules(self):
+ modules = self.find_modules()
+ for (package, module, module_file) in modules:
+
+ # Now "build" the module -- ie. copy the source file to
+ # self.build_lib (the build directory for Python source).
+ # (Actually, it gets copied to the directory for this package
+ # under self.build_lib.)
+ self.build_module(module, module_file, package)
+
+ def build_packages(self):
+ for package in self.packages:
+
+ # Get list of (package, module, module_file) tuples based on
+ # scanning the package directory. 'package' is only included
+ # in the tuple so that 'find_modules()' and
+ # 'find_package_tuples()' have a consistent interface; it's
+ # ignored here (apart from a sanity check). Also, 'module' is
+ # the *unqualified* module name (ie. no dots, no package -- we
+ # already know its package!), and 'module_file' is the path to
+ # the .py file, relative to the current directory
+ # (ie. including 'package_dir').
+ package_dir = self.get_package_dir(package)
+ modules = self.find_package_modules(package, package_dir)
+
+ # Now loop over the modules we found, "building" each one (just
+ # copy it to self.build_lib).
+ for (package_, module, module_file) in modules:
+ assert package == package_
+ self.build_module(module, module_file, package)
+
+ def byte_compile(self, files):
+ if sys.dont_write_bytecode:
+ self.warn('byte-compiling is disabled, skipping.')
+ return
+
+ from distutils.util import byte_compile
+ prefix = self.build_lib
+ if prefix[-1] != os.sep:
+ prefix = prefix + os.sep
+
+ # XXX this code is essentially the same as the 'byte_compile()
+ # method of the "install_lib" command, except for the determination
+ # of the 'prefix' string. Hmmm.
+
+ if self.compile:
+ byte_compile(files, optimize=0,
+ force=self.force, prefix=prefix, dry_run=self.dry_run)
+ if self.optimize > 0:
+ byte_compile(files, optimize=self.optimize,
+ force=self.force, prefix=prefix, dry_run=self.dry_run)
diff --git a/cashew/Lib/distutils/command/build_scripts.py b/cashew/Lib/distutils/command/build_scripts.py
new file mode 100644
index 0000000..567df65
--- /dev/null
+++ b/cashew/Lib/distutils/command/build_scripts.py
@@ -0,0 +1,131 @@
+"""distutils.command.build_scripts
+
+Implements the Distutils 'build_scripts' command."""
+
+__revision__ = "$Id$"
+
+import os, re
+from stat import ST_MODE
+from distutils.core import Command
+from distutils.dep_util import newer
+from distutils.util import convert_path
+from distutils import log
+
+# check if Python is called on the first line with this expression
+first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
+
+class build_scripts (Command):
+
+ description = "\"build\" scripts (copy and fixup #! line)"
+
+ user_options = [
+ ('build-dir=', 'd', "directory to \"build\" (copy) to"),
+ ('force', 'f', "forcibly build everything (ignore file timestamps"),
+ ('executable=', 'e', "specify final destination interpreter path"),
+ ]
+
+ boolean_options = ['force']
+
+
+ def initialize_options (self):
+ self.build_dir = None
+ self.scripts = None
+ self.force = None
+ self.executable = None
+ self.outfiles = None
+
+ def finalize_options (self):
+ self.set_undefined_options('build',
+ ('build_scripts', 'build_dir'),
+ ('force', 'force'),
+ ('executable', 'executable'))
+ self.scripts = self.distribution.scripts
+
+ def get_source_files(self):
+ return self.scripts
+
+ def run (self):
+ if not self.scripts:
+ return
+ self.copy_scripts()
+
+
+ def copy_scripts (self):
+ """Copy each script listed in 'self.scripts'; if it's marked as a
+ Python script in the Unix way (first line matches 'first_line_re',
+ ie. starts with "\#!" and contains "python"), then adjust the first
+ line to refer to the current Python interpreter as we copy.
+ """
+ _sysconfig = __import__('sysconfig')
+ self.mkpath(self.build_dir)
+ outfiles = []
+ for script in self.scripts:
+ adjust = 0
+ script = convert_path(script)
+ outfile = os.path.join(self.build_dir, os.path.basename(script))
+ outfiles.append(outfile)
+
+ if not self.force and not newer(script, outfile):
+ log.debug("not copying %s (up-to-date)", script)
+ continue
+
+ # Always open the file, but ignore failures in dry-run mode --
+ # that way, we'll get accurate feedback if we can read the
+ # script.
+ try:
+ f = open(script, "r")
+ except IOError:
+ if not self.dry_run:
+ raise
+ f = None
+ else:
+ first_line = f.readline()
+ if not first_line:
+ self.warn("%s is an empty file (skipping)" % script)
+ continue
+
+ match = first_line_re.match(first_line)
+ if match:
+ adjust = 1
+ post_interp = match.group(1) or ''
+
+ if adjust:
+ log.info("copying and adjusting %s -> %s", script,
+ self.build_dir)
+ if not self.dry_run:
+ outf = open(outfile, "w")
+ if not _sysconfig.is_python_build():
+ outf.write("#!%s%s\n" %
+ (self.executable,
+ post_interp))
+ else:
+ outf.write("#!%s%s\n" %
+ (os.path.join(
+ _sysconfig.get_config_var("BINDIR"),
+ "python%s%s" % (_sysconfig.get_config_var("VERSION"),
+ _sysconfig.get_config_var("EXE"))),
+ post_interp))
+ outf.writelines(f.readlines())
+ outf.close()
+ if f:
+ f.close()
+ else:
+ if f:
+ f.close()
+ self.copy_file(script, outfile)
+
+ if os.name == 'posix':
+ for file in outfiles:
+ if self.dry_run:
+ log.info("changing mode of %s", file)
+ else:
+ oldmode = os.stat(file)[ST_MODE] & 07777
+ newmode = (oldmode | 0555) & 07777
+ if newmode != oldmode:
+ log.info("changing mode of %s from %o to %o",
+ file, oldmode, newmode)
+ os.chmod(file, newmode)
+
+ # copy_scripts ()
+
+# class build_scripts
diff --git a/cashew/Lib/distutils/command/check.py b/cashew/Lib/distutils/command/check.py
new file mode 100644
index 0000000..4ea03d3
--- /dev/null
+++ b/cashew/Lib/distutils/command/check.py
@@ -0,0 +1,149 @@
+"""distutils.command.check
+
+Implements the Distutils 'check' command.
+"""
+__revision__ = "$Id$"
+
+from distutils.core import Command
+from distutils.dist import PKG_INFO_ENCODING
+from distutils.errors import DistutilsSetupError
+
+try:
+ # docutils is installed
+ from docutils.utils import Reporter
+ from docutils.parsers.rst import Parser
+ from docutils import frontend
+ from docutils import nodes
+ from StringIO import StringIO
+
+ class SilentReporter(Reporter):
+
+ def __init__(self, source, report_level, halt_level, stream=None,
+ debug=0, encoding='ascii', error_handler='replace'):
+ self.messages = []
+ Reporter.__init__(self, source, report_level, halt_level, stream,
+ debug, encoding, error_handler)
+
+ def system_message(self, level, message, *children, **kwargs):
+ self.messages.append((level, message, children, kwargs))
+ return nodes.system_message(message, level=level,
+ type=self.levels[level],
+ *children, **kwargs)
+
+ HAS_DOCUTILS = True
+except ImportError:
+ # docutils is not installed
+ HAS_DOCUTILS = False
+
+class check(Command):
+ """This command checks the meta-data of the package.
+ """
+ description = ("perform some checks on the package")
+ user_options = [('metadata', 'm', 'Verify meta-data'),
+ ('restructuredtext', 'r',
+ ('Checks if long string meta-data syntax '
+ 'are reStructuredText-compliant')),
+ ('strict', 's',
+ 'Will exit with an error if a check fails')]
+
+ boolean_options = ['metadata', 'restructuredtext', 'strict']
+
+ def initialize_options(self):
+ """Sets default values for options."""
+ self.restructuredtext = 0
+ self.metadata = 1
+ self.strict = 0
+ self._warnings = 0
+
+ def finalize_options(self):
+ pass
+
+ def warn(self, msg):
+ """Counts the number of warnings that occurs."""
+ self._warnings += 1
+ return Command.warn(self, msg)
+
+ def run(self):
+ """Runs the command."""
+ # perform the various tests
+ if self.metadata:
+ self.check_metadata()
+ if self.restructuredtext:
+ if HAS_DOCUTILS:
+ self.check_restructuredtext()
+ elif self.strict:
+ raise DistutilsSetupError('The docutils package is needed.')
+
+ # let's raise an error in strict mode, if we have at least
+ # one warning
+ if self.strict and self._warnings > 0:
+ raise DistutilsSetupError('Please correct your package.')
+
+ def check_metadata(self):
+ """Ensures that all required elements of meta-data are supplied.
+
+ name, version, URL, (author and author_email) or
+ (maintainer and maintainer_email)).
+
+ Warns if any are missing.
+ """
+ metadata = self.distribution.metadata
+
+ missing = []
+ for attr in ('name', 'version', 'url'):
+ if not (hasattr(metadata, attr) and getattr(metadata, attr)):
+ missing.append(attr)
+
+ if missing:
+ self.warn("missing required meta-data: %s" % ', '.join(missing))
+ if metadata.author:
+ if not metadata.author_email:
+ self.warn("missing meta-data: if 'author' supplied, " +
+ "'author_email' must be supplied too")
+ elif metadata.maintainer:
+ if not metadata.maintainer_email:
+ self.warn("missing meta-data: if 'maintainer' supplied, " +
+ "'maintainer_email' must be supplied too")
+ else:
+ self.warn("missing meta-data: either (author and author_email) " +
+ "or (maintainer and maintainer_email) " +
+ "must be supplied")
+
+ def check_restructuredtext(self):
+ """Checks if the long string fields are reST-compliant."""
+ data = self.distribution.get_long_description()
+ if not isinstance(data, unicode):
+ data = data.decode(PKG_INFO_ENCODING)
+ for warning in self._check_rst_data(data):
+ line = warning[-1].get('line')
+ if line is None:
+ warning = warning[1]
+ else:
+ warning = '%s (line %s)' % (warning[1], line)
+ self.warn(warning)
+
+ def _check_rst_data(self, data):
+ """Returns warnings when the provided data doesn't compile."""
+ source_path = StringIO()
+ parser = Parser()
+ settings = frontend.OptionParser(components=(Parser,)).get_default_values()
+ settings.tab_width = 4
+ settings.pep_references = None
+ settings.rfc_references = None
+ reporter = SilentReporter(source_path,
+ settings.report_level,
+ settings.halt_level,
+ stream=settings.warning_stream,
+ debug=settings.debug,
+ encoding=settings.error_encoding,
+ error_handler=settings.error_encoding_error_handler)
+
+ document = nodes.document(settings, reporter, source=source_path)
+ document.note_source(source_path, -1)
+ try:
+ parser.parse(data, document)
+ except AttributeError as e:
+ reporter.messages.append(
+ (-1, 'Could not finish the parsing: %s.' % e, '', {}))
+
+ return reporter.messages
diff --git a/cashew/Lib/distutils/command/clean.py b/cashew/Lib/distutils/command/clean.py
new file mode 100644
index 0000000..90ef35f
--- /dev/null
+++ b/cashew/Lib/distutils/command/clean.py
@@ -0,0 +1,80 @@
+"""distutils.command.clean
+
+Implements the Distutils 'clean' command."""
+
+# contributed by Bastian Kleineidam , added 2000-03-18
+
+__revision__ = "$Id$"
+
+import os
+from distutils.core import Command
+from distutils.dir_util import remove_tree
+from distutils import log
+
+class clean(Command):
+
+ description = "clean up temporary files from 'build' command"
+ user_options = [
+ ('build-base=', 'b',
+ "base build directory (default: 'build.build-base')"),
+ ('build-lib=', None,
+ "build directory for all modules (default: 'build.build-lib')"),
+ ('build-temp=', 't',
+ "temporary build directory (default: 'build.build-temp')"),
+ ('build-scripts=', None,
+ "build directory for scripts (default: 'build.build-scripts')"),
+ ('bdist-base=', None,
+ "temporary directory for built distributions"),
+ ('all', 'a',
+ "remove all build output, not just temporary by-products")
+ ]
+
+ boolean_options = ['all']
+
+ def initialize_options(self):
+ self.build_base = None
+ self.build_lib = None
+ self.build_temp = None
+ self.build_scripts = None
+ self.bdist_base = None
+ self.all = None
+
+ def finalize_options(self):
+ self.set_undefined_options('build',
+ ('build_base', 'build_base'),
+ ('build_lib', 'build_lib'),
+ ('build_scripts', 'build_scripts'),
+ ('build_temp', 'build_temp'))
+ self.set_undefined_options('bdist',
+ ('bdist_base', 'bdist_base'))
+
+ def run(self):
+ # remove the build/temp. directory (unless it's already
+ # gone)
+ if os.path.exists(self.build_temp):
+ remove_tree(self.build_temp, dry_run=self.dry_run)
+ else:
+ log.debug("'%s' does not exist -- can't clean it",
+ self.build_temp)
+
+ if self.all:
+ # remove build directories
+ for directory in (self.build_lib,
+ self.bdist_base,
+ self.build_scripts):
+ if os.path.exists(directory):
+ remove_tree(directory, dry_run=self.dry_run)
+ else:
+ log.warn("'%s' does not exist -- can't clean it",
+ directory)
+
+ # just for the heck of it, try to remove the base build directory:
+ # we might have emptied it right now, but if not we don't care
+ if not self.dry_run:
+ try:
+ os.rmdir(self.build_base)
+ log.info("removing '%s'", self.build_base)
+ except OSError:
+ pass
+
+# class clean
diff --git a/cashew/Lib/distutils/command/config.py b/cashew/Lib/distutils/command/config.py
new file mode 100644
index 0000000..b084913
--- /dev/null
+++ b/cashew/Lib/distutils/command/config.py
@@ -0,0 +1,357 @@
+"""distutils.command.config
+
+Implements the Distutils 'config' command, a (mostly) empty command class
+that exists mainly to be sub-classed by specific module distributions and
+applications. The idea is that while every "config" command is different,
+at least they're all named the same, and users always see "config" in the
+list of standard commands. Also, this is a good place to put common
+configure-like tasks: "try to compile this C code", or "figure out where
+this header file lives".
+"""
+
+__revision__ = "$Id$"
+
+import os
+import re
+
+from distutils.core import Command
+from distutils.errors import DistutilsExecError
+from distutils.sysconfig import customize_compiler
+from distutils import log
+
+LANG_EXT = {'c': '.c', 'c++': '.cxx'}
+
+class config(Command):
+
+ description = "prepare to build"
+
+ user_options = [
+ ('compiler=', None,
+ "specify the compiler type"),
+ ('cc=', None,
+ "specify the compiler executable"),
+ ('include-dirs=', 'I',
+ "list of directories to search for header files"),
+ ('define=', 'D',
+ "C preprocessor macros to define"),
+ ('undef=', 'U',
+ "C preprocessor macros to undefine"),
+ ('libraries=', 'l',
+ "external C libraries to link with"),
+ ('library-dirs=', 'L',
+ "directories to search for external C libraries"),
+
+ ('noisy', None,
+ "show every action (compile, link, run, ...) taken"),
+ ('dump-source', None,
+ "dump generated source files before attempting to compile them"),
+ ]
+
+
+ # The three standard command methods: since the "config" command
+ # does nothing by default, these are empty.
+
+ def initialize_options(self):
+ self.compiler = None
+ self.cc = None
+ self.include_dirs = None
+ self.libraries = None
+ self.library_dirs = None
+
+ # maximal output for now
+ self.noisy = 1
+ self.dump_source = 1
+
+ # list of temporary files generated along-the-way that we have
+ # to clean at some point
+ self.temp_files = []
+
+ def finalize_options(self):
+ if self.include_dirs is None:
+ self.include_dirs = self.distribution.include_dirs or []
+ elif isinstance(self.include_dirs, str):
+ self.include_dirs = self.include_dirs.split(os.pathsep)
+
+ if self.libraries is None:
+ self.libraries = []
+ elif isinstance(self.libraries, str):
+ self.libraries = [self.libraries]
+
+ if self.library_dirs is None:
+ self.library_dirs = []
+ elif isinstance(self.library_dirs, str):
+ self.library_dirs = self.library_dirs.split(os.pathsep)
+
+ def run(self):
+ pass
+
+
+ # Utility methods for actual "config" commands. The interfaces are
+ # loosely based on Autoconf macros of similar names. Sub-classes
+ # may use these freely.
+
+ def _check_compiler(self):
+ """Check that 'self.compiler' really is a CCompiler object;
+ if not, make it one.
+ """
+ # We do this late, and only on-demand, because this is an expensive
+ # import.
+ from distutils.ccompiler import CCompiler, new_compiler
+ if not isinstance(self.compiler, CCompiler):
+ self.compiler = new_compiler(compiler=self.compiler,
+ dry_run=self.dry_run, force=1)
+ customize_compiler(self.compiler)
+ if self.include_dirs:
+ self.compiler.set_include_dirs(self.include_dirs)
+ if self.libraries:
+ self.compiler.set_libraries(self.libraries)
+ if self.library_dirs:
+ self.compiler.set_library_dirs(self.library_dirs)
+
+
+ def _gen_temp_sourcefile(self, body, headers, lang):
+ filename = "_configtest" + LANG_EXT[lang]
+ file = open(filename, "w")
+ if headers:
+ for header in headers:
+ file.write("#include <%s>\n" % header)
+ file.write("\n")
+ file.write(body)
+ if body[-1] != "\n":
+ file.write("\n")
+ file.close()
+ return filename
+
+ def _preprocess(self, body, headers, include_dirs, lang):
+ src = self._gen_temp_sourcefile(body, headers, lang)
+ out = "_configtest.i"
+ self.temp_files.extend([src, out])
+ self.compiler.preprocess(src, out, include_dirs=include_dirs)
+ return (src, out)
+
+ def _compile(self, body, headers, include_dirs, lang):
+ src = self._gen_temp_sourcefile(body, headers, lang)
+ if self.dump_source:
+ dump_file(src, "compiling '%s':" % src)
+ (obj,) = self.compiler.object_filenames([src])
+ self.temp_files.extend([src, obj])
+ self.compiler.compile([src], include_dirs=include_dirs)
+ return (src, obj)
+
+ def _link(self, body, headers, include_dirs, libraries, library_dirs,
+ lang):
+ (src, obj) = self._compile(body, headers, include_dirs, lang)
+ prog = os.path.splitext(os.path.basename(src))[0]
+ self.compiler.link_executable([obj], prog,
+ libraries=libraries,
+ library_dirs=library_dirs,
+ target_lang=lang)
+
+ if self.compiler.exe_extension is not None:
+ prog = prog + self.compiler.exe_extension
+ self.temp_files.append(prog)
+
+ return (src, obj, prog)
+
+ def _clean(self, *filenames):
+ if not filenames:
+ filenames = self.temp_files
+ self.temp_files = []
+ log.info("removing: %s", ' '.join(filenames))
+ for filename in filenames:
+ try:
+ os.remove(filename)
+ except OSError:
+ pass
+
+
+ # XXX these ignore the dry-run flag: what to do, what to do? even if
+ # you want a dry-run build, you still need some sort of configuration
+ # info. My inclination is to make it up to the real config command to
+ # consult 'dry_run', and assume a default (minimal) configuration if
+ # true. The problem with trying to do it here is that you'd have to
+ # return either true or false from all the 'try' methods, neither of
+ # which is correct.
+
+ # XXX need access to the header search path and maybe default macros.
+
+ def try_cpp(self, body=None, headers=None, include_dirs=None, lang="c"):
+ """Construct a source file from 'body' (a string containing lines
+ of C/C++ code) and 'headers' (a list of header files to include)
+ and run it through the preprocessor. Return true if the
+ preprocessor succeeded, false if there were any errors.
+ ('body' probably isn't of much use, but what the heck.)
+ """
+ from distutils.ccompiler import CompileError
+ self._check_compiler()
+ ok = 1
+ try:
+ self._preprocess(body, headers, include_dirs, lang)
+ except CompileError:
+ ok = 0
+
+ self._clean()
+ return ok
+
+ def search_cpp(self, pattern, body=None, headers=None, include_dirs=None,
+ lang="c"):
+ """Construct a source file (just like 'try_cpp()'), run it through
+ the preprocessor, and return true if any line of the output matches
+ 'pattern'. 'pattern' should either be a compiled regex object or a
+ string containing a regex. If both 'body' and 'headers' are None,
+ preprocesses an empty file -- which can be useful to determine the
+ symbols the preprocessor and compiler set by default.
+ """
+ self._check_compiler()
+ src, out = self._preprocess(body, headers, include_dirs, lang)
+
+ if isinstance(pattern, str):
+ pattern = re.compile(pattern)
+
+ file = open(out)
+ match = 0
+ while 1:
+ line = file.readline()
+ if line == '':
+ break
+ if pattern.search(line):
+ match = 1
+ break
+
+ file.close()
+ self._clean()
+ return match
+
+ def try_compile(self, body, headers=None, include_dirs=None, lang="c"):
+ """Try to compile a source file built from 'body' and 'headers'.
+ Return true on success, false otherwise.
+ """
+ from distutils.ccompiler import CompileError
+ self._check_compiler()
+ try:
+ self._compile(body, headers, include_dirs, lang)
+ ok = 1
+ except CompileError:
+ ok = 0
+
+ log.info(ok and "success!" or "failure.")
+ self._clean()
+ return ok
+
+ def try_link(self, body, headers=None, include_dirs=None, libraries=None,
+ library_dirs=None, lang="c"):
+ """Try to compile and link a source file, built from 'body' and
+ 'headers', to executable form. Return true on success, false
+ otherwise.
+ """
+ from distutils.ccompiler import CompileError, LinkError
+ self._check_compiler()
+ try:
+ self._link(body, headers, include_dirs,
+ libraries, library_dirs, lang)
+ ok = 1
+ except (CompileError, LinkError):
+ ok = 0
+
+ log.info(ok and "success!" or "failure.")
+ self._clean()
+ return ok
+
+ def try_run(self, body, headers=None, include_dirs=None, libraries=None,
+ library_dirs=None, lang="c"):
+ """Try to compile, link to an executable, and run a program
+ built from 'body' and 'headers'. Return true on success, false
+ otherwise.
+ """
+ from distutils.ccompiler import CompileError, LinkError
+ self._check_compiler()
+ try:
+ src, obj, exe = self._link(body, headers, include_dirs,
+ libraries, library_dirs, lang)
+ self.spawn([exe])
+ ok = 1
+ except (CompileError, LinkError, DistutilsExecError):
+ ok = 0
+
+ log.info(ok and "success!" or "failure.")
+ self._clean()
+ return ok
+
+
+ # -- High-level methods --------------------------------------------
+ # (these are the ones that are actually likely to be useful
+ # when implementing a real-world config command!)
+
+ def check_func(self, func, headers=None, include_dirs=None,
+ libraries=None, library_dirs=None, decl=0, call=0):
+
+ """Determine if function 'func' is available by constructing a
+ source file that refers to 'func', and compiles and links it.
+ If everything succeeds, returns true; otherwise returns false.
+
+ The constructed source file starts out by including the header
+ files listed in 'headers'. If 'decl' is true, it then declares
+ 'func' (as "int func()"); you probably shouldn't supply 'headers'
+ and set 'decl' true in the same call, or you might get errors about
+ a conflicting declarations for 'func'. Finally, the constructed
+ 'main()' function either references 'func' or (if 'call' is true)
+ calls it. 'libraries' and 'library_dirs' are used when
+ linking.
+ """
+
+ self._check_compiler()
+ body = []
+ if decl:
+ body.append("int %s ();" % func)
+ body.append("int main () {")
+ if call:
+ body.append(" %s();" % func)
+ else:
+ body.append(" %s;" % func)
+ body.append("}")
+ body = "\n".join(body) + "\n"
+
+ return self.try_link(body, headers, include_dirs,
+ libraries, library_dirs)
+
+ # check_func ()
+
+ def check_lib(self, library, library_dirs=None, headers=None,
+ include_dirs=None, other_libraries=[]):
+ """Determine if 'library' is available to be linked against,
+ without actually checking that any particular symbols are provided
+ by it. 'headers' will be used in constructing the source file to
+ be compiled, but the only effect of this is to check if all the
+ header files listed are available. Any libraries listed in
+ 'other_libraries' will be included in the link, in case 'library'
+ has symbols that depend on other libraries.
+ """
+ self._check_compiler()
+ return self.try_link("int main (void) { }",
+ headers, include_dirs,
+ [library]+other_libraries, library_dirs)
+
+ def check_header(self, header, include_dirs=None, library_dirs=None,
+ lang="c"):
+ """Determine if the system header file named by 'header_file'
+ exists and can be found by the preprocessor; return true if so,
+ false otherwise.
+ """
+ return self.try_cpp(body="/* No body */", headers=[header],
+ include_dirs=include_dirs)
+
+
+def dump_file(filename, head=None):
+ """Dumps a file content into log.info.
+
+ If head is not None, will be dumped before the file content.
+ """
+ if head is None:
+ log.info('%s' % filename)
+ else:
+ log.info(head)
+ file = open(filename)
+ try:
+ log.info(file.read())
+ finally:
+ file.close()
diff --git a/cashew/Lib/distutils/command/install.py b/cashew/Lib/distutils/command/install.py
new file mode 100644
index 0000000..b9f1c6c
--- /dev/null
+++ b/cashew/Lib/distutils/command/install.py
@@ -0,0 +1,672 @@
+"""distutils.command.install
+
+Implements the Distutils 'install' command."""
+
+from distutils import log
+
+# This module should be kept compatible with Python 2.1.
+
+__revision__ = "$Id$"
+
+import sys, os, string
+from types import *
+from distutils.core import Command
+from distutils.debug import DEBUG
+from distutils.sysconfig import get_config_vars
+from distutils.errors import DistutilsPlatformError
+from distutils.file_util import write_file
+from distutils.util import convert_path, subst_vars, change_root
+from distutils.util import get_platform
+from distutils.errors import DistutilsOptionError
+from site import USER_BASE
+from site import USER_SITE
+
+
+if sys.version < "2.2":
+ WINDOWS_SCHEME = {
+ 'purelib': '$base',
+ 'platlib': '$base',
+ 'headers': '$base/Include/$dist_name',
+ 'scripts': '$base/Scripts',
+ 'data' : '$base',
+ }
+else:
+ WINDOWS_SCHEME = {
+ 'purelib': '$base/Lib/site-packages',
+ 'platlib': '$base/Lib/site-packages',
+ 'headers': '$base/Include/$dist_name',
+ 'scripts': '$base/Scripts',
+ 'data' : '$base',
+ }
+
+INSTALL_SCHEMES = {
+ 'unix_prefix': {
+ 'purelib': '$base/lib/python$py_version_short/site-packages',
+ 'platlib': '$platbase/lib/python$py_version_short/site-packages',
+ 'headers': '$base/include/python$py_version_short/$dist_name',
+ 'scripts': '$base/bin',
+ 'data' : '$base',
+ },
+ 'unix_home': {
+ 'purelib': '$base/lib/python',
+ 'platlib': '$base/lib/python',
+ 'headers': '$base/include/python/$dist_name',
+ 'scripts': '$base/bin',
+ 'data' : '$base',
+ },
+ 'unix_user': {
+ 'purelib': '$usersite',
+ 'platlib': '$usersite',
+ 'headers': '$userbase/include/python$py_version_short/$dist_name',
+ 'scripts': '$userbase/bin',
+ 'data' : '$userbase',
+ },
+ 'nt': WINDOWS_SCHEME,
+ 'nt_user': {
+ 'purelib': '$usersite',
+ 'platlib': '$usersite',
+ 'headers': '$userbase/Python$py_version_nodot/Include/$dist_name',
+ 'scripts': '$userbase/Scripts',
+ 'data' : '$userbase',
+ },
+ 'os2': {
+ 'purelib': '$base/Lib/site-packages',
+ 'platlib': '$base/Lib/site-packages',
+ 'headers': '$base/Include/$dist_name',
+ 'scripts': '$base/Scripts',
+ 'data' : '$base',
+ },
+ 'os2_home': {
+ 'purelib': '$usersite',
+ 'platlib': '$usersite',
+ 'headers': '$userbase/include/python$py_version_short/$dist_name',
+ 'scripts': '$userbase/bin',
+ 'data' : '$userbase',
+ },
+ }
+
+# The keys to an installation scheme; if any new types of files are to be
+# installed, be sure to add an entry to every installation scheme above,
+# and to SCHEME_KEYS here.
+SCHEME_KEYS = ('purelib', 'platlib', 'headers', 'scripts', 'data')
+
+
+class install (Command):
+
+ description = "install everything from build directory"
+
+ user_options = [
+ # Select installation scheme and set base director(y|ies)
+ ('prefix=', None,
+ "installation prefix"),
+ ('exec-prefix=', None,
+ "(Unix only) prefix for platform-specific files"),
+ ('home=', None,
+ "(Unix only) home directory to install under"),
+ ('user', None,
+ "install in user site-package '%s'" % USER_SITE),
+
+ # Or, just set the base director(y|ies)
+ ('install-base=', None,
+ "base installation directory (instead of --prefix or --home)"),
+ ('install-platbase=', None,
+ "base installation directory for platform-specific files " +
+ "(instead of --exec-prefix or --home)"),
+ ('root=', None,
+ "install everything relative to this alternate root directory"),
+
+ # Or, explicitly set the installation scheme
+ ('install-purelib=', None,
+ "installation directory for pure Python module distributions"),
+ ('install-platlib=', None,
+ "installation directory for non-pure module distributions"),
+ ('install-lib=', None,
+ "installation directory for all module distributions " +
+ "(overrides --install-purelib and --install-platlib)"),
+
+ ('install-headers=', None,
+ "installation directory for C/C++ headers"),
+ ('install-scripts=', None,
+ "installation directory for Python scripts"),
+ ('install-data=', None,
+ "installation directory for data files"),
+
+ # Byte-compilation options -- see install_lib.py for details, as
+ # these are duplicated from there (but only install_lib does
+ # anything with them).
+ ('compile', 'c', "compile .py to .pyc [default]"),
+ ('no-compile', None, "don't compile .py files"),
+ ('optimize=', 'O',
+ "also compile with optimization: -O1 for \"python -O\", "
+ "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"),
+
+ # Miscellaneous control options
+ ('force', 'f',
+ "force installation (overwrite any existing files)"),
+ ('skip-build', None,
+ "skip rebuilding everything (for testing/debugging)"),
+
+ # Where to install documentation (eventually!)
+ #('doc-format=', None, "format of documentation to generate"),
+ #('install-man=', None, "directory for Unix man pages"),
+ #('install-html=', None, "directory for HTML documentation"),
+ #('install-info=', None, "directory for GNU info files"),
+
+ ('record=', None,
+ "filename in which to record list of installed files"),
+ ]
+
+ boolean_options = ['compile', 'force', 'skip-build', 'user']
+ negative_opt = {'no-compile' : 'compile'}
+
+
+ def initialize_options (self):
+
+ # High-level options: these select both an installation base
+ # and scheme.
+ self.prefix = None
+ self.exec_prefix = None
+ self.home = None
+ self.user = 0
+
+ # These select only the installation base; it's up to the user to
+ # specify the installation scheme (currently, that means supplying
+ # the --install-{platlib,purelib,scripts,data} options).
+ self.install_base = None
+ self.install_platbase = None
+ self.root = None
+
+ # These options are the actual installation directories; if not
+ # supplied by the user, they are filled in using the installation
+ # scheme implied by prefix/exec-prefix/home and the contents of
+ # that installation scheme.
+ self.install_purelib = None # for pure module distributions
+ self.install_platlib = None # non-pure (dists w/ extensions)
+ self.install_headers = None # for C/C++ headers
+ self.install_lib = None # set to either purelib or platlib
+ self.install_scripts = None
+ self.install_data = None
+ self.install_userbase = USER_BASE
+ self.install_usersite = USER_SITE
+
+ self.compile = None
+ self.optimize = None
+
+ # These two are for putting non-packagized distributions into their
+ # own directory and creating a .pth file if it makes sense.
+ # 'extra_path' comes from the setup file; 'install_path_file' can
+ # be turned off if it makes no sense to install a .pth file. (But
+ # better to install it uselessly than to guess wrong and not
+ # install it when it's necessary and would be used!) Currently,
+ # 'install_path_file' is always true unless some outsider meddles
+ # with it.
+ self.extra_path = None
+ self.install_path_file = 1
+
+ # 'force' forces installation, even if target files are not
+ # out-of-date. 'skip_build' skips running the "build" command,
+ # handy if you know it's not necessary. 'warn_dir' (which is *not*
+ # a user option, it's just there so the bdist_* commands can turn
+ # it off) determines whether we warn about installing to a
+ # directory not in sys.path.
+ self.force = 0
+ self.skip_build = 0
+ self.warn_dir = 1
+
+ # These are only here as a conduit from the 'build' command to the
+ # 'install_*' commands that do the real work. ('build_base' isn't
+ # actually used anywhere, but it might be useful in future.) They
+ # are not user options, because if the user told the install
+ # command where the build directory is, that wouldn't affect the
+ # build command.
+ self.build_base = None
+ self.build_lib = None
+
+ # Not defined yet because we don't know anything about
+ # documentation yet.
+ #self.install_man = None
+ #self.install_html = None
+ #self.install_info = None
+
+ self.record = None
+
+
+ # -- Option finalizing methods -------------------------------------
+ # (This is rather more involved than for most commands,
+ # because this is where the policy for installing third-
+ # party Python modules on various platforms given a wide
+ # array of user input is decided. Yes, it's quite complex!)
+
+ def finalize_options (self):
+
+ # This method (and its pliant slaves, like 'finalize_unix()',
+ # 'finalize_other()', and 'select_scheme()') is where the default
+ # installation directories for modules, extension modules, and
+ # anything else we care to install from a Python module
+ # distribution. Thus, this code makes a pretty important policy
+ # statement about how third-party stuff is added to a Python
+ # installation! Note that the actual work of installation is done
+ # by the relatively simple 'install_*' commands; they just take
+ # their orders from the installation directory options determined
+ # here.
+
+ # Check for errors/inconsistencies in the options; first, stuff
+ # that's wrong on any platform.
+
+ if ((self.prefix or self.exec_prefix or self.home) and
+ (self.install_base or self.install_platbase)):
+ raise DistutilsOptionError, \
+ ("must supply either prefix/exec-prefix/home or " +
+ "install-base/install-platbase -- not both")
+
+ if self.home and (self.prefix or self.exec_prefix):
+ raise DistutilsOptionError, \
+ "must supply either home or prefix/exec-prefix -- not both"
+
+ if self.user and (self.prefix or self.exec_prefix or self.home or
+ self.install_base or self.install_platbase):
+ raise DistutilsOptionError("can't combine user with prefix, "
+ "exec_prefix/home, or install_(plat)base")
+
+ # Next, stuff that's wrong (or dubious) only on certain platforms.
+ if os.name != "posix":
+ if self.exec_prefix:
+ self.warn("exec-prefix option ignored on this platform")
+ self.exec_prefix = None
+
+ # Now the interesting logic -- so interesting that we farm it out
+ # to other methods. The goal of these methods is to set the final
+ # values for the install_{lib,scripts,data,...} options, using as
+ # input a heady brew of prefix, exec_prefix, home, install_base,
+ # install_platbase, user-supplied versions of
+ # install_{purelib,platlib,lib,scripts,data,...}, and the
+ # INSTALL_SCHEME dictionary above. Phew!
+
+ self.dump_dirs("pre-finalize_{unix,other}")
+
+ if os.name == 'posix':
+ self.finalize_unix()
+ else:
+ self.finalize_other()
+
+ self.dump_dirs("post-finalize_{unix,other}()")
+
+ # Expand configuration variables, tilde, etc. in self.install_base
+ # and self.install_platbase -- that way, we can use $base or
+ # $platbase in the other installation directories and not worry
+ # about needing recursive variable expansion (shudder).
+
+ py_version = (string.split(sys.version))[0]
+ (prefix, exec_prefix) = get_config_vars('prefix', 'exec_prefix')
+ self.config_vars = {'dist_name': self.distribution.get_name(),
+ 'dist_version': self.distribution.get_version(),
+ 'dist_fullname': self.distribution.get_fullname(),
+ 'py_version': py_version,
+ 'py_version_short': py_version[0:3],
+ 'py_version_nodot': py_version[0] + py_version[2],
+ 'sys_prefix': prefix,
+ 'prefix': prefix,
+ 'sys_exec_prefix': exec_prefix,
+ 'exec_prefix': exec_prefix,
+ 'userbase': self.install_userbase,
+ 'usersite': self.install_usersite,
+ }
+ self.expand_basedirs()
+
+ self.dump_dirs("post-expand_basedirs()")
+
+ # Now define config vars for the base directories so we can expand
+ # everything else.
+ self.config_vars['base'] = self.install_base
+ self.config_vars['platbase'] = self.install_platbase
+
+ if DEBUG:
+ from pprint import pprint
+ print "config vars:"
+ pprint(self.config_vars)
+
+ # Expand "~" and configuration variables in the installation
+ # directories.
+ self.expand_dirs()
+
+ self.dump_dirs("post-expand_dirs()")
+
+ # Create directories in the home dir:
+ if self.user:
+ self.create_home_path()
+
+ # Pick the actual directory to install all modules to: either
+ # install_purelib or install_platlib, depending on whether this
+ # module distribution is pure or not. Of course, if the user
+ # already specified install_lib, use their selection.
+ if self.install_lib is None:
+ if self.distribution.ext_modules: # has extensions: non-pure
+ self.install_lib = self.install_platlib
+ else:
+ self.install_lib = self.install_purelib
+
+
+ # Convert directories from Unix /-separated syntax to the local
+ # convention.
+ self.convert_paths('lib', 'purelib', 'platlib',
+ 'scripts', 'data', 'headers',
+ 'userbase', 'usersite')
+
+ # Well, we're not actually fully completely finalized yet: we still
+ # have to deal with 'extra_path', which is the hack for allowing
+ # non-packagized module distributions (hello, Numerical Python!) to
+ # get their own directories.
+ self.handle_extra_path()
+ self.install_libbase = self.install_lib # needed for .pth file
+ self.install_lib = os.path.join(self.install_lib, self.extra_dirs)
+
+ # If a new root directory was supplied, make all the installation
+ # dirs relative to it.
+ if self.root is not None:
+ self.change_roots('libbase', 'lib', 'purelib', 'platlib',
+ 'scripts', 'data', 'headers')
+
+ self.dump_dirs("after prepending root")
+
+ # Find out the build directories, ie. where to install from.
+ self.set_undefined_options('build',
+ ('build_base', 'build_base'),
+ ('build_lib', 'build_lib'))
+
+ # Punt on doc directories for now -- after all, we're punting on
+ # documentation completely!
+
+ # finalize_options ()
+
+
+ def dump_dirs (self, msg):
+ if DEBUG:
+ from distutils.fancy_getopt import longopt_xlate
+ print msg + ":"
+ for opt in self.user_options:
+ opt_name = opt[0]
+ if opt_name[-1] == "=":
+ opt_name = opt_name[0:-1]
+ if opt_name in self.negative_opt:
+ opt_name = string.translate(self.negative_opt[opt_name],
+ longopt_xlate)
+ val = not getattr(self, opt_name)
+ else:
+ opt_name = string.translate(opt_name, longopt_xlate)
+ val = getattr(self, opt_name)
+ print " %s: %s" % (opt_name, val)
+
+
+ def finalize_unix (self):
+
+ if self.install_base is not None or self.install_platbase is not None:
+ if ((self.install_lib is None and
+ self.install_purelib is None and
+ self.install_platlib is None) or
+ self.install_headers is None or
+ self.install_scripts is None or
+ self.install_data is None):
+ raise DistutilsOptionError, \
+ ("install-base or install-platbase supplied, but "
+ "installation scheme is incomplete")
+ return
+
+ if self.user:
+ if self.install_userbase is None:
+ raise DistutilsPlatformError(
+ "User base directory is not specified")
+ self.install_base = self.install_platbase = self.install_userbase
+ self.select_scheme("unix_user")
+ elif self.home is not None:
+ self.install_base = self.install_platbase = self.home
+ self.select_scheme("unix_home")
+ else:
+ if self.prefix is None:
+ if self.exec_prefix is not None:
+ raise DistutilsOptionError, \
+ "must not supply exec-prefix without prefix"
+
+ self.prefix = os.path.normpath(sys.prefix)
+ self.exec_prefix = os.path.normpath(sys.exec_prefix)
+
+ else:
+ if self.exec_prefix is None:
+ self.exec_prefix = self.prefix
+
+ self.install_base = self.prefix
+ self.install_platbase = self.exec_prefix
+ self.select_scheme("unix_prefix")
+
+ # finalize_unix ()
+
+
+ def finalize_other (self): # Windows and Mac OS for now
+
+ if self.user:
+ if self.install_userbase is None:
+ raise DistutilsPlatformError(
+ "User base directory is not specified")
+ self.install_base = self.install_platbase = self.install_userbase
+ self.select_scheme(os.name + "_user")
+ elif self.home is not None:
+ self.install_base = self.install_platbase = self.home
+ self.select_scheme("unix_home")
+ else:
+ if self.prefix is None:
+ self.prefix = os.path.normpath(sys.prefix)
+
+ self.install_base = self.install_platbase = self.prefix
+ try:
+ self.select_scheme(os.name)
+ except KeyError:
+ raise DistutilsPlatformError, \
+ "I don't know how to install stuff on '%s'" % os.name
+
+ # finalize_other ()
+
+
+ def select_scheme (self, name):
+ # it's the caller's problem if they supply a bad name!
+ scheme = INSTALL_SCHEMES[name]
+ for key in SCHEME_KEYS:
+ attrname = 'install_' + key
+ if getattr(self, attrname) is None:
+ setattr(self, attrname, scheme[key])
+
+
+ def _expand_attrs (self, attrs):
+ for attr in attrs:
+ val = getattr(self, attr)
+ if val is not None:
+ if os.name == 'posix' or os.name == 'nt':
+ val = os.path.expanduser(val)
+ val = subst_vars(val, self.config_vars)
+ setattr(self, attr, val)
+
+
+ def expand_basedirs (self):
+ self._expand_attrs(['install_base',
+ 'install_platbase',
+ 'root'])
+
+ def expand_dirs (self):
+ self._expand_attrs(['install_purelib',
+ 'install_platlib',
+ 'install_lib',
+ 'install_headers',
+ 'install_scripts',
+ 'install_data',])
+
+
+ def convert_paths (self, *names):
+ for name in names:
+ attr = "install_" + name
+ setattr(self, attr, convert_path(getattr(self, attr)))
+
+
+ def handle_extra_path (self):
+
+ if self.extra_path is None:
+ self.extra_path = self.distribution.extra_path
+
+ if self.extra_path is not None:
+ if type(self.extra_path) is StringType:
+ self.extra_path = string.split(self.extra_path, ',')
+
+ if len(self.extra_path) == 1:
+ path_file = extra_dirs = self.extra_path[0]
+ elif len(self.extra_path) == 2:
+ (path_file, extra_dirs) = self.extra_path
+ else:
+ raise DistutilsOptionError, \
+ ("'extra_path' option must be a list, tuple, or "
+ "comma-separated string with 1 or 2 elements")
+
+ # convert to local form in case Unix notation used (as it
+ # should be in setup scripts)
+ extra_dirs = convert_path(extra_dirs)
+
+ else:
+ path_file = None
+ extra_dirs = ''
+
+ # XXX should we warn if path_file and not extra_dirs? (in which
+ # case the path file would be harmless but pointless)
+ self.path_file = path_file
+ self.extra_dirs = extra_dirs
+
+ # handle_extra_path ()
+
+
+ def change_roots (self, *names):
+ for name in names:
+ attr = "install_" + name
+ setattr(self, attr, change_root(self.root, getattr(self, attr)))
+
+ def create_home_path(self):
+ """Create directories under ~
+ """
+ if not self.user:
+ return
+ home = convert_path(os.path.expanduser("~"))
+ for name, path in self.config_vars.iteritems():
+ if path.startswith(home) and not os.path.isdir(path):
+ self.debug_print("os.makedirs('%s', 0700)" % path)
+ os.makedirs(path, 0700)
+
+ # -- Command execution methods -------------------------------------
+
+ def run (self):
+
+ # Obviously have to build before we can install
+ if not self.skip_build:
+ self.run_command('build')
+ # If we built for any other platform, we can't install.
+ build_plat = self.distribution.get_command_obj('build').plat_name
+ # check warn_dir - it is a clue that the 'install' is happening
+ # internally, and not to sys.path, so we don't check the platform
+ # matches what we are running.
+ if self.warn_dir and build_plat != get_platform():
+ raise DistutilsPlatformError("Can't install when "
+ "cross-compiling")
+
+ # Run all sub-commands (at least those that need to be run)
+ for cmd_name in self.get_sub_commands():
+ self.run_command(cmd_name)
+
+ if self.path_file:
+ self.create_path_file()
+
+ # write list of installed files, if requested.
+ if self.record:
+ outputs = self.get_outputs()
+ if self.root: # strip any package prefix
+ root_len = len(self.root)
+ for counter in xrange(len(outputs)):
+ outputs[counter] = outputs[counter][root_len:]
+ self.execute(write_file,
+ (self.record, outputs),
+ "writing list of installed files to '%s'" %
+ self.record)
+
+ sys_path = map(os.path.normpath, sys.path)
+ sys_path = map(os.path.normcase, sys_path)
+ install_lib = os.path.normcase(os.path.normpath(self.install_lib))
+ if (self.warn_dir and
+ not (self.path_file and self.install_path_file) and
+ install_lib not in sys_path):
+ log.debug(("modules installed to '%s', which is not in "
+ "Python's module search path (sys.path) -- "
+ "you'll have to change the search path yourself"),
+ self.install_lib)
+
+ # run ()
+
+ def create_path_file (self):
+ filename = os.path.join(self.install_libbase,
+ self.path_file + ".pth")
+ if self.install_path_file:
+ self.execute(write_file,
+ (filename, [self.extra_dirs]),
+ "creating %s" % filename)
+ else:
+ self.warn("path file '%s' not created" % filename)
+
+
+ # -- Reporting methods ---------------------------------------------
+
+ def get_outputs (self):
+ # Assemble the outputs of all the sub-commands.
+ outputs = []
+ for cmd_name in self.get_sub_commands():
+ cmd = self.get_finalized_command(cmd_name)
+ # Add the contents of cmd.get_outputs(), ensuring
+ # that outputs doesn't contain duplicate entries
+ for filename in cmd.get_outputs():
+ if filename not in outputs:
+ outputs.append(filename)
+
+ if self.path_file and self.install_path_file:
+ outputs.append(os.path.join(self.install_libbase,
+ self.path_file + ".pth"))
+
+ return outputs
+
+ def get_inputs (self):
+ # XXX gee, this looks familiar ;-(
+ inputs = []
+ for cmd_name in self.get_sub_commands():
+ cmd = self.get_finalized_command(cmd_name)
+ inputs.extend(cmd.get_inputs())
+
+ return inputs
+
+
+ # -- Predicates for sub-command list -------------------------------
+
+ def has_lib (self):
+ """Return true if the current distribution has any Python
+ modules to install."""
+ return (self.distribution.has_pure_modules() or
+ self.distribution.has_ext_modules())
+
+ def has_headers (self):
+ return self.distribution.has_headers()
+
+ def has_scripts (self):
+ return self.distribution.has_scripts()
+
+ def has_data (self):
+ return self.distribution.has_data_files()
+
+
+ # 'sub_commands': a list of commands this command might have to run to
+ # get its work done. See cmd.py for more info.
+ sub_commands = [('install_lib', has_lib),
+ ('install_headers', has_headers),
+ ('install_scripts', has_scripts),
+ ('install_data', has_data),
+ ('install_egg_info', lambda self:True),
+ ]
+
+# class install
diff --git a/cashew/Lib/distutils/command/install_data.py b/cashew/Lib/distutils/command/install_data.py
new file mode 100644
index 0000000..ab40797
--- /dev/null
+++ b/cashew/Lib/distutils/command/install_data.py
@@ -0,0 +1,81 @@
+"""distutils.command.install_data
+
+Implements the Distutils 'install_data' command, for installing
+platform-independent data files."""
+
+# contributed by Bastian Kleineidam
+
+__revision__ = "$Id$"
+
+import os
+from distutils.core import Command
+from distutils.util import change_root, convert_path
+
+class install_data(Command):
+
+ description = "install data files"
+
+ user_options = [
+ ('install-dir=', 'd',
+ "base directory for installing data files "
+ "(default: installation base dir)"),
+ ('root=', None,
+ "install everything relative to this alternate root directory"),
+ ('force', 'f', "force installation (overwrite existing files)"),
+ ]
+
+ boolean_options = ['force']
+
+ def initialize_options(self):
+ self.install_dir = None
+ self.outfiles = []
+ self.root = None
+ self.force = 0
+ self.data_files = self.distribution.data_files
+ self.warn_dir = 1
+
+ def finalize_options(self):
+ self.set_undefined_options('install',
+ ('install_data', 'install_dir'),
+ ('root', 'root'),
+ ('force', 'force'),
+ )
+
+ def run(self):
+ self.mkpath(self.install_dir)
+ for f in self.data_files:
+ if isinstance(f, str):
+ # it's a simple file, so copy it
+ f = convert_path(f)
+ if self.warn_dir:
+ self.warn("setup script did not provide a directory for "
+ "'%s' -- installing right in '%s'" %
+ (f, self.install_dir))
+ (out, _) = self.copy_file(f, self.install_dir)
+ self.outfiles.append(out)
+ else:
+ # it's a tuple with path to install to and a list of files
+ dir = convert_path(f[0])
+ if not os.path.isabs(dir):
+ dir = os.path.join(self.install_dir, dir)
+ elif self.root:
+ dir = change_root(self.root, dir)
+ self.mkpath(dir)
+
+ if f[1] == []:
+ # If there are no files listed, the user must be
+ # trying to create an empty directory, so add the
+ # directory to the list of output files.
+ self.outfiles.append(dir)
+ else:
+ # Copy files, adding them to the list of output files.
+ for data in f[1]:
+ data = convert_path(data)
+ (out, _) = self.copy_file(data, dir)
+ self.outfiles.append(out)
+
+ def get_inputs(self):
+ return self.data_files or []
+
+ def get_outputs(self):
+ return self.outfiles
diff --git a/cashew/Lib/distutils/command/install_egg_info.py b/cashew/Lib/distutils/command/install_egg_info.py
new file mode 100644
index 0000000..c888031
--- /dev/null
+++ b/cashew/Lib/distutils/command/install_egg_info.py
@@ -0,0 +1,78 @@
+"""distutils.command.install_egg_info
+
+Implements the Distutils 'install_egg_info' command, for installing
+a package's PKG-INFO metadata."""
+
+
+from distutils.cmd import Command
+from distutils import log, dir_util
+import os, sys, re
+
+class install_egg_info(Command):
+ """Install an .egg-info file for the package"""
+
+ description = "Install package's PKG-INFO metadata as an .egg-info file"
+ user_options = [
+ ('install-dir=', 'd', "directory to install to"),
+ ]
+
+ def initialize_options(self):
+ self.install_dir = None
+
+ def finalize_options(self):
+ self.set_undefined_options('install_lib',('install_dir','install_dir'))
+ basename = "%s-%s-py%s.egg-info" % (
+ to_filename(safe_name(self.distribution.get_name())),
+ to_filename(safe_version(self.distribution.get_version())),
+ sys.version[:3]
+ )
+ self.target = os.path.join(self.install_dir, basename)
+ self.outputs = [self.target]
+
+ def run(self):
+ target = self.target
+ if os.path.isdir(target) and not os.path.islink(target):
+ dir_util.remove_tree(target, dry_run=self.dry_run)
+ elif os.path.exists(target):
+ self.execute(os.unlink,(self.target,),"Removing "+target)
+ elif not os.path.isdir(self.install_dir):
+ self.execute(os.makedirs, (self.install_dir,),
+ "Creating "+self.install_dir)
+ log.info("Writing %s", target)
+ if not self.dry_run:
+ f = open(target, 'w')
+ self.distribution.metadata.write_pkg_file(f)
+ f.close()
+
+ def get_outputs(self):
+ return self.outputs
+
+
+# The following routines are taken from setuptools' pkg_resources module and
+# can be replaced by importing them from pkg_resources once it is included
+# in the stdlib.
+
+def safe_name(name):
+ """Convert an arbitrary string to a standard distribution name
+
+ Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+ """
+ return re.sub('[^A-Za-z0-9.]+', '-', name)
+
+
+def safe_version(version):
+ """Convert an arbitrary string to a standard version string
+
+ Spaces become dots, and all other non-alphanumeric characters become
+ dashes, with runs of multiple dashes condensed to a single dash.
+ """
+ version = version.replace(' ','.')
+ return re.sub('[^A-Za-z0-9.]+', '-', version)
+
+
+def to_filename(name):
+ """Convert a project or version name to its filename-escaped form
+
+ Any '-' characters are currently replaced with '_'.
+ """
+ return name.replace('-','_')
diff --git a/cashew/Lib/distutils/command/install_headers.py b/cashew/Lib/distutils/command/install_headers.py
new file mode 100644
index 0000000..d892416
--- /dev/null
+++ b/cashew/Lib/distutils/command/install_headers.py
@@ -0,0 +1,51 @@
+"""distutils.command.install_headers
+
+Implements the Distutils 'install_headers' command, to install C/C++ header
+files to the Python include directory."""
+
+__revision__ = "$Id$"
+
+from distutils.core import Command
+
+
+# XXX force is never used
+class install_headers(Command):
+
+ description = "install C/C++ header files"
+
+ user_options = [('install-dir=', 'd',
+ "directory to install header files to"),
+ ('force', 'f',
+ "force installation (overwrite existing files)"),
+ ]
+
+ boolean_options = ['force']
+
+ def initialize_options(self):
+ self.install_dir = None
+ self.force = 0
+ self.outfiles = []
+
+ def finalize_options(self):
+ self.set_undefined_options('install',
+ ('install_headers', 'install_dir'),
+ ('force', 'force'))
+
+
+ def run(self):
+ headers = self.distribution.headers
+ if not headers:
+ return
+
+ self.mkpath(self.install_dir)
+ for header in headers:
+ (out, _) = self.copy_file(header, self.install_dir)
+ self.outfiles.append(out)
+
+ def get_inputs(self):
+ return self.distribution.headers or []
+
+ def get_outputs(self):
+ return self.outfiles
+
+# class install_headers
diff --git a/cashew/Lib/distutils/command/install_lib.py b/cashew/Lib/distutils/command/install_lib.py
new file mode 100644
index 0000000..043e8b6
--- /dev/null
+++ b/cashew/Lib/distutils/command/install_lib.py
@@ -0,0 +1,219 @@
+"""distutils.command.install_lib
+
+Implements the Distutils 'install_lib' command
+(install all Python modules)."""
+
+__revision__ = "$Id$"
+
+import os
+import sys
+
+from distutils.core import Command
+from distutils.errors import DistutilsOptionError
+
+
+# Extension for Python source files.
+if hasattr(os, 'extsep'):
+ PYTHON_SOURCE_EXTENSION = os.extsep + "py"
+else:
+ PYTHON_SOURCE_EXTENSION = ".py"
+
+class install_lib(Command):
+
+ description = "install all Python modules (extensions and pure Python)"
+
+ # The byte-compilation options are a tad confusing. Here are the
+ # possible scenarios:
+ # 1) no compilation at all (--no-compile --no-optimize)
+ # 2) compile .pyc only (--compile --no-optimize; default)
+ # 3) compile .pyc and "level 1" .pyo (--compile --optimize)
+ # 4) compile "level 1" .pyo only (--no-compile --optimize)
+ # 5) compile .pyc and "level 2" .pyo (--compile --optimize-more)
+ # 6) compile "level 2" .pyo only (--no-compile --optimize-more)
+ #
+ # The UI for this is two option, 'compile' and 'optimize'.
+ # 'compile' is strictly boolean, and only decides whether to
+ # generate .pyc files. 'optimize' is three-way (0, 1, or 2), and
+ # decides both whether to generate .pyo files and what level of
+ # optimization to use.
+
+ user_options = [
+ ('install-dir=', 'd', "directory to install to"),
+ ('build-dir=','b', "build directory (where to install from)"),
+ ('force', 'f', "force installation (overwrite existing files)"),
+ ('compile', 'c', "compile .py to .pyc [default]"),
+ ('no-compile', None, "don't compile .py files"),
+ ('optimize=', 'O',
+ "also compile with optimization: -O1 for \"python -O\", "
+ "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"),
+ ('skip-build', None, "skip the build steps"),
+ ]
+
+ boolean_options = ['force', 'compile', 'skip-build']
+ negative_opt = {'no-compile' : 'compile'}
+
+ def initialize_options(self):
+ # let the 'install' command dictate our installation directory
+ self.install_dir = None
+ self.build_dir = None
+ self.force = 0
+ self.compile = None
+ self.optimize = None
+ self.skip_build = None
+
+ def finalize_options(self):
+ # Get all the information we need to install pure Python modules
+ # from the umbrella 'install' command -- build (source) directory,
+ # install (target) directory, and whether to compile .py files.
+ self.set_undefined_options('install',
+ ('build_lib', 'build_dir'),
+ ('install_lib', 'install_dir'),
+ ('force', 'force'),
+ ('compile', 'compile'),
+ ('optimize', 'optimize'),
+ ('skip_build', 'skip_build'),
+ )
+
+ if self.compile is None:
+ self.compile = 1
+ if self.optimize is None:
+ self.optimize = 0
+
+ if not isinstance(self.optimize, int):
+ try:
+ self.optimize = int(self.optimize)
+ if self.optimize not in (0, 1, 2):
+ raise AssertionError
+ except (ValueError, AssertionError):
+ raise DistutilsOptionError, "optimize must be 0, 1, or 2"
+
+ def run(self):
+ # Make sure we have built everything we need first
+ self.build()
+
+ # Install everything: simply dump the entire contents of the build
+ # directory to the installation directory (that's the beauty of
+ # having a build directory!)
+ outfiles = self.install()
+
+ # (Optionally) compile .py to .pyc
+ if outfiles is not None and self.distribution.has_pure_modules():
+ self.byte_compile(outfiles)
+
+ # -- Top-level worker functions ------------------------------------
+ # (called from 'run()')
+
+ def build(self):
+ if not self.skip_build:
+ if self.distribution.has_pure_modules():
+ self.run_command('build_py')
+ if self.distribution.has_ext_modules():
+ self.run_command('build_ext')
+
+ def install(self):
+ if os.path.isdir(self.build_dir):
+ outfiles = self.copy_tree(self.build_dir, self.install_dir)
+ else:
+ self.warn("'%s' does not exist -- no Python modules to install" %
+ self.build_dir)
+ return
+ return outfiles
+
+ def byte_compile(self, files):
+ if sys.dont_write_bytecode:
+ self.warn('byte-compiling is disabled, skipping.')
+ return
+
+ from distutils.util import byte_compile
+
+ # Get the "--root" directory supplied to the "install" command,
+ # and use it as a prefix to strip off the purported filename
+ # encoded in bytecode files. This is far from complete, but it
+ # should at least generate usable bytecode in RPM distributions.
+ install_root = self.get_finalized_command('install').root
+
+ if self.compile:
+ byte_compile(files, optimize=0,
+ force=self.force, prefix=install_root,
+ dry_run=self.dry_run)
+ if self.optimize > 0:
+ byte_compile(files, optimize=self.optimize,
+ force=self.force, prefix=install_root,
+ verbose=self.verbose, dry_run=self.dry_run)
+
+
+ # -- Utility methods -----------------------------------------------
+
+ def _mutate_outputs(self, has_any, build_cmd, cmd_option, output_dir):
+ if not has_any:
+ return []
+
+ build_cmd = self.get_finalized_command(build_cmd)
+ build_files = build_cmd.get_outputs()
+ build_dir = getattr(build_cmd, cmd_option)
+
+ prefix_len = len(build_dir) + len(os.sep)
+ outputs = []
+ for file in build_files:
+ outputs.append(os.path.join(output_dir, file[prefix_len:]))
+
+ return outputs
+
+ def _bytecode_filenames(self, py_filenames):
+ bytecode_files = []
+ for py_file in py_filenames:
+ # Since build_py handles package data installation, the
+ # list of outputs can contain more than just .py files.
+ # Make sure we only report bytecode for the .py files.
+ ext = os.path.splitext(os.path.normcase(py_file))[1]
+ if ext != PYTHON_SOURCE_EXTENSION:
+ continue
+ if self.compile:
+ bytecode_files.append(py_file + "c")
+ if self.optimize > 0:
+ bytecode_files.append(py_file + "o")
+
+ return bytecode_files
+
+
+ # -- External interface --------------------------------------------
+ # (called by outsiders)
+
+ def get_outputs(self):
+ """Return the list of files that would be installed if this command
+ were actually run. Not affected by the "dry-run" flag or whether
+ modules have actually been built yet.
+ """
+ pure_outputs = \
+ self._mutate_outputs(self.distribution.has_pure_modules(),
+ 'build_py', 'build_lib',
+ self.install_dir)
+ if self.compile:
+ bytecode_outputs = self._bytecode_filenames(pure_outputs)
+ else:
+ bytecode_outputs = []
+
+ ext_outputs = \
+ self._mutate_outputs(self.distribution.has_ext_modules(),
+ 'build_ext', 'build_lib',
+ self.install_dir)
+
+ return pure_outputs + bytecode_outputs + ext_outputs
+
+ def get_inputs(self):
+ """Get the list of files that are input to this command, ie. the
+ files that get installed as they are named in the build tree.
+ The files in this list correspond one-to-one to the output
+ filenames returned by 'get_outputs()'.
+ """
+ inputs = []
+
+ if self.distribution.has_pure_modules():
+ build_py = self.get_finalized_command('build_py')
+ inputs.extend(build_py.get_outputs())
+
+ if self.distribution.has_ext_modules():
+ build_ext = self.get_finalized_command('build_ext')
+ inputs.extend(build_ext.get_outputs())
+
+ return inputs
diff --git a/cashew/Lib/distutils/command/install_scripts.py b/cashew/Lib/distutils/command/install_scripts.py
new file mode 100644
index 0000000..29cd9e7
--- /dev/null
+++ b/cashew/Lib/distutils/command/install_scripts.py
@@ -0,0 +1,64 @@
+"""distutils.command.install_scripts
+
+Implements the Distutils 'install_scripts' command, for installing
+Python scripts."""
+
+# contributed by Bastian Kleineidam
+
+__revision__ = "$Id$"
+
+import os
+from distutils.core import Command
+from distutils import log
+from stat import ST_MODE
+
+class install_scripts (Command):
+
+ description = "install scripts (Python or otherwise)"
+
+ user_options = [
+ ('install-dir=', 'd', "directory to install scripts to"),
+ ('build-dir=','b', "build directory (where to install from)"),
+ ('force', 'f', "force installation (overwrite existing files)"),
+ ('skip-build', None, "skip the build steps"),
+ ]
+
+ boolean_options = ['force', 'skip-build']
+
+
+ def initialize_options (self):
+ self.install_dir = None
+ self.force = 0
+ self.build_dir = None
+ self.skip_build = None
+
+ def finalize_options (self):
+ self.set_undefined_options('build', ('build_scripts', 'build_dir'))
+ self.set_undefined_options('install',
+ ('install_scripts', 'install_dir'),
+ ('force', 'force'),
+ ('skip_build', 'skip_build'),
+ )
+
+ def run (self):
+ if not self.skip_build:
+ self.run_command('build_scripts')
+ self.outfiles = self.copy_tree(self.build_dir, self.install_dir)
+ if os.name == 'posix':
+ # Set the executable bits (owner, group, and world) on
+ # all the scripts we just installed.
+ for file in self.get_outputs():
+ if self.dry_run:
+ log.info("changing mode of %s", file)
+ else:
+ mode = ((os.stat(file)[ST_MODE]) | 0555) & 07777
+ log.info("changing mode of %s to %o", file, mode)
+ os.chmod(file, mode)
+
+ def get_inputs (self):
+ return self.distribution.scripts or []
+
+ def get_outputs(self):
+ return self.outfiles or []
+
+# class install_scripts
diff --git a/cashew/Lib/distutils/command/register.py b/cashew/Lib/distutils/command/register.py
new file mode 100644
index 0000000..edb42b9
--- /dev/null
+++ b/cashew/Lib/distutils/command/register.py
@@ -0,0 +1,315 @@
+"""distutils.command.register
+
+Implements the Distutils 'register' command (register with the repository).
+"""
+
+# created 2002/10/21, Richard Jones
+
+__revision__ = "$Id$"
+
+import urllib2
+import getpass
+import urlparse
+from warnings import warn
+
+from distutils.core import PyPIRCCommand
+from distutils import log
+
+class register(PyPIRCCommand):
+
+ description = ("register the distribution with the Python package index")
+ user_options = PyPIRCCommand.user_options + [
+ ('list-classifiers', None,
+ 'list the valid Trove classifiers'),
+ ('strict', None ,
+ 'Will stop the registering if the meta-data are not fully compliant')
+ ]
+ boolean_options = PyPIRCCommand.boolean_options + [
+ 'verify', 'list-classifiers', 'strict']
+
+ sub_commands = [('check', lambda self: True)]
+
+ def initialize_options(self):
+ PyPIRCCommand.initialize_options(self)
+ self.list_classifiers = 0
+ self.strict = 0
+
+ def finalize_options(self):
+ PyPIRCCommand.finalize_options(self)
+ # setting options for the `check` subcommand
+ check_options = {'strict': ('register', self.strict),
+ 'restructuredtext': ('register', 1)}
+ self.distribution.command_options['check'] = check_options
+
+ def run(self):
+ self.finalize_options()
+ self._set_config()
+
+ # Run sub commands
+ for cmd_name in self.get_sub_commands():
+ self.run_command(cmd_name)
+
+ if self.dry_run:
+ self.verify_metadata()
+ elif self.list_classifiers:
+ self.classifiers()
+ else:
+ self.send_metadata()
+
+ def check_metadata(self):
+ """Deprecated API."""
+ warn("distutils.command.register.check_metadata is deprecated, \
+ use the check command instead", PendingDeprecationWarning)
+ check = self.distribution.get_command_obj('check')
+ check.ensure_finalized()
+ check.strict = self.strict
+ check.restructuredtext = 1
+ check.run()
+
+ def _set_config(self):
+ ''' Reads the configuration file and set attributes.
+ '''
+ config = self._read_pypirc()
+ if config != {}:
+ self.username = config['username']
+ self.password = config['password']
+ self.repository = config['repository']
+ self.realm = config['realm']
+ self.has_config = True
+ else:
+ if self.repository not in ('pypi', self.DEFAULT_REPOSITORY):
+ raise ValueError('%s not found in .pypirc' % self.repository)
+ if self.repository == 'pypi':
+ self.repository = self.DEFAULT_REPOSITORY
+ self.has_config = False
+
+ def classifiers(self):
+ ''' Fetch the list of classifiers from the server.
+ '''
+ response = urllib2.urlopen(self.repository+'?:action=list_classifiers')
+ log.info(response.read())
+
+ def verify_metadata(self):
+ ''' Send the metadata to the package index server to be checked.
+ '''
+ # send the info to the server and report the result
+ (code, result) = self.post_to_server(self.build_post_data('verify'))
+ log.info('Server response (%s): %s' % (code, result))
+
+
+ def send_metadata(self):
+ ''' Send the metadata to the package index server.
+
+ Well, do the following:
+ 1. figure who the user is, and then
+ 2. send the data as a Basic auth'ed POST.
+
+ First we try to read the username/password from $HOME/.pypirc,
+ which is a ConfigParser-formatted file with a section
+ [distutils] containing username and password entries (both
+ in clear text). Eg:
+
+ [distutils]
+ index-servers =
+ pypi
+
+ [pypi]
+ username: fred
+ password: sekrit
+
+ Otherwise, to figure who the user is, we offer the user three
+ choices:
+
+ 1. use existing login,
+ 2. register as a new user, or
+ 3. set the password to a random string and email the user.
+
+ '''
+ # see if we can short-cut and get the username/password from the
+ # config
+ if self.has_config:
+ choice = '1'
+ username = self.username
+ password = self.password
+ else:
+ choice = 'x'
+ username = password = ''
+
+ # get the user's login info
+ choices = '1 2 3 4'.split()
+ while choice not in choices:
+ self.announce('''\
+We need to know who you are, so please choose either:
+ 1. use your existing login,
+ 2. register as a new user,
+ 3. have the server generate a new password for you (and email it to you), or
+ 4. quit
+Your selection [default 1]: ''', log.INFO)
+
+ choice = raw_input()
+ if not choice:
+ choice = '1'
+ elif choice not in choices:
+ print 'Please choose one of the four options!'
+
+ if choice == '1':
+ # get the username and password
+ while not username:
+ username = raw_input('Username: ')
+ while not password:
+ password = getpass.getpass('Password: ')
+
+ # set up the authentication
+ auth = urllib2.HTTPPasswordMgr()
+ host = urlparse.urlparse(self.repository)[1]
+ auth.add_password(self.realm, host, username, password)
+ # send the info to the server and report the result
+ code, result = self.post_to_server(self.build_post_data('submit'),
+ auth)
+ self.announce('Server response (%s): %s' % (code, result),
+ log.INFO)
+
+ # possibly save the login
+ if code == 200:
+ if self.has_config:
+ # sharing the password in the distribution instance
+ # so the upload command can reuse it
+ self.distribution.password = password
+ else:
+ self.announce(('I can store your PyPI login so future '
+ 'submissions will be faster.'), log.INFO)
+ self.announce('(the login will be stored in %s)' % \
+ self._get_rc_file(), log.INFO)
+ choice = 'X'
+ while choice.lower() not in 'yn':
+ choice = raw_input('Save your login (y/N)?')
+ if not choice:
+ choice = 'n'
+ if choice.lower() == 'y':
+ self._store_pypirc(username, password)
+
+ elif choice == '2':
+ data = {':action': 'user'}
+ data['name'] = data['password'] = data['email'] = ''
+ data['confirm'] = None
+ while not data['name']:
+ data['name'] = raw_input('Username: ')
+ while data['password'] != data['confirm']:
+ while not data['password']:
+ data['password'] = getpass.getpass('Password: ')
+ while not data['confirm']:
+ data['confirm'] = getpass.getpass(' Confirm: ')
+ if data['password'] != data['confirm']:
+ data['password'] = ''
+ data['confirm'] = None
+ print "Password and confirm don't match!"
+ while not data['email']:
+ data['email'] = raw_input(' EMail: ')
+ code, result = self.post_to_server(data)
+ if code != 200:
+ log.info('Server response (%s): %s' % (code, result))
+ else:
+ log.info('You will receive an email shortly.')
+ log.info(('Follow the instructions in it to '
+ 'complete registration.'))
+ elif choice == '3':
+ data = {':action': 'password_reset'}
+ data['email'] = ''
+ while not data['email']:
+ data['email'] = raw_input('Your email address: ')
+ code, result = self.post_to_server(data)
+ log.info('Server response (%s): %s' % (code, result))
+
+ def build_post_data(self, action):
+ # figure the data to send - the metadata plus some additional
+ # information used by the package server
+ meta = self.distribution.metadata
+ data = {
+ ':action': action,
+ 'metadata_version' : '1.0',
+ 'name': meta.get_name(),
+ 'version': meta.get_version(),
+ 'summary': meta.get_description(),
+ 'home_page': meta.get_url(),
+ 'author': meta.get_contact(),
+ 'author_email': meta.get_contact_email(),
+ 'license': meta.get_licence(),
+ 'description': meta.get_long_description(),
+ 'keywords': meta.get_keywords(),
+ 'platform': meta.get_platforms(),
+ 'classifiers': meta.get_classifiers(),
+ 'download_url': meta.get_download_url(),
+ # PEP 314
+ 'provides': meta.get_provides(),
+ 'requires': meta.get_requires(),
+ 'obsoletes': meta.get_obsoletes(),
+ }
+ if data['provides'] or data['requires'] or data['obsoletes']:
+ data['metadata_version'] = '1.1'
+ return data
+
+ def post_to_server(self, data, auth=None):
+ ''' Post a query to the server, and return a string response.
+ '''
+ if 'name' in data:
+ self.announce('Registering %s to %s' % (data['name'],
+ self.repository),
+ log.INFO)
+ # Build up the MIME payload for the urllib2 POST data
+ boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+ sep_boundary = '\n--' + boundary
+ end_boundary = sep_boundary + '--'
+ chunks = []
+ for key, value in data.items():
+ # handle multiple entries for the same name
+ if type(value) not in (type([]), type( () )):
+ value = [value]
+ for value in value:
+ chunks.append(sep_boundary)
+ chunks.append('\nContent-Disposition: form-data; name="%s"'%key)
+ chunks.append("\n\n")
+ chunks.append(value)
+ if value and value[-1] == '\r':
+ chunks.append('\n') # write an extra newline (lurve Macs)
+ chunks.append(end_boundary)
+ chunks.append("\n")
+
+ # chunks may be bytes (str) or unicode objects that we need to encode
+ body = []
+ for chunk in chunks:
+ if isinstance(chunk, unicode):
+ body.append(chunk.encode('utf-8'))
+ else:
+ body.append(chunk)
+
+ body = ''.join(body)
+
+ # build the Request
+ headers = {
+ 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary,
+ 'Content-length': str(len(body))
+ }
+ req = urllib2.Request(self.repository, body, headers)
+
+ # handle HTTP and include the Basic Auth handler
+ opener = urllib2.build_opener(
+ urllib2.HTTPBasicAuthHandler(password_mgr=auth)
+ )
+ data = ''
+ try:
+ result = opener.open(req)
+ except urllib2.HTTPError, e:
+ if self.show_response:
+ data = e.fp.read()
+ result = e.code, e.msg
+ except urllib2.URLError, e:
+ result = 500, str(e)
+ else:
+ if self.show_response:
+ data = result.read()
+ result = 200, 'OK'
+ if self.show_response:
+ dashes = '-' * 75
+ self.announce('%s%s%s' % (dashes, data, dashes))
+
+ return result
diff --git a/cashew/Lib/distutils/command/sdist.py b/cashew/Lib/distutils/command/sdist.py
new file mode 100644
index 0000000..821420d
--- /dev/null
+++ b/cashew/Lib/distutils/command/sdist.py
@@ -0,0 +1,477 @@
+"""distutils.command.sdist
+
+Implements the Distutils 'sdist' command (create a source distribution)."""
+
+__revision__ = "$Id$"
+
+import os
+import string
+import sys
+from glob import glob
+from warnings import warn
+
+from distutils.core import Command
+from distutils import dir_util, dep_util, file_util, archive_util
+from distutils.text_file import TextFile
+from distutils.errors import (DistutilsPlatformError, DistutilsOptionError,
+ DistutilsTemplateError)
+from distutils.filelist import FileList
+from distutils import log
+from distutils.util import convert_path
+
+def show_formats():
+ """Print all possible values for the 'formats' option (used by
+ the "--help-formats" command-line option).
+ """
+ from distutils.fancy_getopt import FancyGetopt
+ from distutils.archive_util import ARCHIVE_FORMATS
+ formats = []
+ for format in ARCHIVE_FORMATS.keys():
+ formats.append(("formats=" + format, None,
+ ARCHIVE_FORMATS[format][2]))
+ formats.sort()
+ FancyGetopt(formats).print_help(
+ "List of available source distribution formats:")
+
+class sdist(Command):
+
+ description = "create a source distribution (tarball, zip file, etc.)"
+
+ def checking_metadata(self):
+ """Callable used for the check sub-command.
+
+ Placed here so user_options can view it"""
+ return self.metadata_check
+
+ user_options = [
+ ('template=', 't',
+ "name of manifest template file [default: MANIFEST.in]"),
+ ('manifest=', 'm',
+ "name of manifest file [default: MANIFEST]"),
+ ('use-defaults', None,
+ "include the default file set in the manifest "
+ "[default; disable with --no-defaults]"),
+ ('no-defaults', None,
+ "don't include the default file set"),
+ ('prune', None,
+ "specifically exclude files/directories that should not be "
+ "distributed (build tree, RCS/CVS dirs, etc.) "
+ "[default; disable with --no-prune]"),
+ ('no-prune', None,
+ "don't automatically exclude anything"),
+ ('manifest-only', 'o',
+ "just regenerate the manifest and then stop "
+ "(implies --force-manifest)"),
+ ('force-manifest', 'f',
+ "forcibly regenerate the manifest and carry on as usual. "
+ "Deprecated: now the manifest is always regenerated."),
+ ('formats=', None,
+ "formats for source distribution (comma-separated list)"),
+ ('keep-temp', 'k',
+ "keep the distribution tree around after creating " +
+ "archive file(s)"),
+ ('dist-dir=', 'd',
+ "directory to put the source distribution archive(s) in "
+ "[default: dist]"),
+ ('metadata-check', None,
+ "Ensure that all required elements of meta-data "
+ "are supplied. Warn if any missing. [default]"),
+ ('owner=', 'u',
+ "Owner name used when creating a tar file [default: current user]"),
+ ('group=', 'g',
+ "Group name used when creating a tar file [default: current group]"),
+ ]
+
+ boolean_options = ['use-defaults', 'prune',
+ 'manifest-only', 'force-manifest',
+ 'keep-temp', 'metadata-check']
+
+ help_options = [
+ ('help-formats', None,
+ "list available distribution formats", show_formats),
+ ]
+
+ negative_opt = {'no-defaults': 'use-defaults',
+ 'no-prune': 'prune' }
+
+ default_format = {'posix': 'gztar',
+ 'nt': 'zip' }
+
+ sub_commands = [('check', checking_metadata)]
+
+ def initialize_options(self):
+ # 'template' and 'manifest' are, respectively, the names of
+ # the manifest template and manifest file.
+ self.template = None
+ self.manifest = None
+
+ # 'use_defaults': if true, we will include the default file set
+ # in the manifest
+ self.use_defaults = 1
+ self.prune = 1
+
+ self.manifest_only = 0
+ self.force_manifest = 0
+
+ self.formats = None
+ self.keep_temp = 0
+ self.dist_dir = None
+
+ self.archive_files = None
+ self.metadata_check = 1
+ self.owner = None
+ self.group = None
+
+ def finalize_options(self):
+ if self.manifest is None:
+ self.manifest = "MANIFEST"
+ if self.template is None:
+ self.template = "MANIFEST.in"
+
+ self.ensure_string_list('formats')
+ if self.formats is None:
+ try:
+ self.formats = [self.default_format[os.name]]
+ except KeyError:
+ raise DistutilsPlatformError, \
+ "don't know how to create source distributions " + \
+ "on platform %s" % os.name
+
+ bad_format = archive_util.check_archive_formats(self.formats)
+ if bad_format:
+ raise DistutilsOptionError, \
+ "unknown archive format '%s'" % bad_format
+
+ if self.dist_dir is None:
+ self.dist_dir = "dist"
+
+ def run(self):
+ # 'filelist' contains the list of files that will make up the
+ # manifest
+ self.filelist = FileList()
+
+ # Run sub commands
+ for cmd_name in self.get_sub_commands():
+ self.run_command(cmd_name)
+
+ # Do whatever it takes to get the list of files to process
+ # (process the manifest template, read an existing manifest,
+ # whatever). File list is accumulated in 'self.filelist'.
+ self.get_file_list()
+
+ # If user just wanted us to regenerate the manifest, stop now.
+ if self.manifest_only:
+ return
+
+ # Otherwise, go ahead and create the source distribution tarball,
+ # or zipfile, or whatever.
+ self.make_distribution()
+
+ def check_metadata(self):
+ """Deprecated API."""
+ warn("distutils.command.sdist.check_metadata is deprecated, \
+ use the check command instead", PendingDeprecationWarning)
+ check = self.distribution.get_command_obj('check')
+ check.ensure_finalized()
+ check.run()
+
+ def get_file_list(self):
+ """Figure out the list of files to include in the source
+ distribution, and put it in 'self.filelist'. This might involve
+ reading the manifest template (and writing the manifest), or just
+ reading the manifest, or just using the default file set -- it all
+ depends on the user's options.
+ """
+ # new behavior when using a template:
+ # the file list is recalculated every time because
+ # even if MANIFEST.in or setup.py are not changed
+ # the user might have added some files in the tree that
+ # need to be included.
+ #
+ # This makes --force the default and only behavior with templates.
+ template_exists = os.path.isfile(self.template)
+ if not template_exists and self._manifest_is_not_generated():
+ self.read_manifest()
+ self.filelist.sort()
+ self.filelist.remove_duplicates()
+ return
+
+ if not template_exists:
+ self.warn(("manifest template '%s' does not exist " +
+ "(using default file list)") %
+ self.template)
+ self.filelist.findall()
+
+ if self.use_defaults:
+ self.add_defaults()
+
+ if template_exists:
+ self.read_template()
+
+ if self.prune:
+ self.prune_file_list()
+
+ self.filelist.sort()
+ self.filelist.remove_duplicates()
+ self.write_manifest()
+
+ def add_defaults(self):
+ """Add all the default files to self.filelist:
+ - README or README.txt
+ - setup.py
+ - test/test*.py
+ - all pure Python modules mentioned in setup script
+ - all files pointed by package_data (build_py)
+ - all files defined in data_files.
+ - all files defined as scripts.
+ - all C sources listed as part of extensions or C libraries
+ in the setup script (doesn't catch C headers!)
+ Warns if (README or README.txt) or setup.py are missing; everything
+ else is optional.
+ """
+
+ standards = [('README', 'README.txt'), self.distribution.script_name]
+ for fn in standards:
+ if isinstance(fn, tuple):
+ alts = fn
+ got_it = 0
+ for fn in alts:
+ if os.path.exists(fn):
+ got_it = 1
+ self.filelist.append(fn)
+ break
+
+ if not got_it:
+ self.warn("standard file not found: should have one of " +
+ string.join(alts, ', '))
+ else:
+ if os.path.exists(fn):
+ self.filelist.append(fn)
+ else:
+ self.warn("standard file '%s' not found" % fn)
+
+ optional = ['test/test*.py', 'setup.cfg']
+ for pattern in optional:
+ files = filter(os.path.isfile, glob(pattern))
+ if files:
+ self.filelist.extend(files)
+
+ # build_py is used to get:
+ # - python modules
+ # - files defined in package_data
+ build_py = self.get_finalized_command('build_py')
+
+ # getting python files
+ if self.distribution.has_pure_modules():
+ self.filelist.extend(build_py.get_source_files())
+
+ # getting package_data files
+ # (computed in build_py.data_files by build_py.finalize_options)
+ for pkg, src_dir, build_dir, filenames in build_py.data_files:
+ for filename in filenames:
+ self.filelist.append(os.path.join(src_dir, filename))
+
+ # getting distribution.data_files
+ if self.distribution.has_data_files():
+ for item in self.distribution.data_files:
+ if isinstance(item, str): # plain file
+ item = convert_path(item)
+ if os.path.isfile(item):
+ self.filelist.append(item)
+ else: # a (dirname, filenames) tuple
+ dirname, filenames = item
+ for f in filenames:
+ f = convert_path(f)
+ if os.path.isfile(f):
+ self.filelist.append(f)
+
+ if self.distribution.has_ext_modules():
+ build_ext = self.get_finalized_command('build_ext')
+ self.filelist.extend(build_ext.get_source_files())
+
+ if self.distribution.has_c_libraries():
+ build_clib = self.get_finalized_command('build_clib')
+ self.filelist.extend(build_clib.get_source_files())
+
+ if self.distribution.has_scripts():
+ build_scripts = self.get_finalized_command('build_scripts')
+ self.filelist.extend(build_scripts.get_source_files())
+
+ def read_template(self):
+ """Read and parse manifest template file named by self.template.
+
+ (usually "MANIFEST.in") The parsing and processing is done by
+ 'self.filelist', which updates itself accordingly.
+ """
+ log.info("reading manifest template '%s'", self.template)
+ template = TextFile(self.template,
+ strip_comments=1,
+ skip_blanks=1,
+ join_lines=1,
+ lstrip_ws=1,
+ rstrip_ws=1,
+ collapse_join=1)
+
+ try:
+ while 1:
+ line = template.readline()
+ if line is None: # end of file
+ break
+
+ try:
+ self.filelist.process_template_line(line)
+ # the call above can raise a DistutilsTemplateError for
+ # malformed lines, or a ValueError from the lower-level
+ # convert_path function
+ except (DistutilsTemplateError, ValueError) as msg:
+ self.warn("%s, line %d: %s" % (template.filename,
+ template.current_line,
+ msg))
+ finally:
+ template.close()
+
+ def prune_file_list(self):
+ """Prune off branches that might slip into the file list as created
+ by 'read_template()', but really don't belong there:
+ * the build tree (typically "build")
+ * the release tree itself (only an issue if we ran "sdist"
+ previously with --keep-temp, or it aborted)
+ * any RCS, CVS, .svn, .hg, .git, .bzr, _darcs directories
+ """
+ build = self.get_finalized_command('build')
+ base_dir = self.distribution.get_fullname()
+
+ self.filelist.exclude_pattern(None, prefix=build.build_base)
+ self.filelist.exclude_pattern(None, prefix=base_dir)
+
+ # pruning out vcs directories
+ # both separators are used under win32
+ if sys.platform == 'win32':
+ seps = r'/|\\'
+ else:
+ seps = '/'
+
+ vcs_dirs = ['RCS', 'CVS', r'\.svn', r'\.hg', r'\.git', r'\.bzr',
+ '_darcs']
+ vcs_ptrn = r'(^|%s)(%s)(%s).*' % (seps, '|'.join(vcs_dirs), seps)
+ self.filelist.exclude_pattern(vcs_ptrn, is_regex=1)
+
+ def write_manifest(self):
+ """Write the file list in 'self.filelist' (presumably as filled in
+ by 'add_defaults()' and 'read_template()') to the manifest file
+ named by 'self.manifest'.
+ """
+ if self._manifest_is_not_generated():
+ log.info("not writing to manually maintained "
+ "manifest file '%s'" % self.manifest)
+ return
+
+ content = self.filelist.files[:]
+ content.insert(0, '# file GENERATED by distutils, do NOT edit')
+ self.execute(file_util.write_file, (self.manifest, content),
+ "writing manifest file '%s'" % self.manifest)
+
+ def _manifest_is_not_generated(self):
+ # check for special comment used in 2.7.1 and higher
+ if not os.path.isfile(self.manifest):
+ return False
+
+ fp = open(self.manifest, 'rU')
+ try:
+ first_line = fp.readline()
+ finally:
+ fp.close()
+ return first_line != '# file GENERATED by distutils, do NOT edit\n'
+
+ def read_manifest(self):
+ """Read the manifest file (named by 'self.manifest') and use it to
+ fill in 'self.filelist', the list of files to include in the source
+ distribution.
+ """
+ log.info("reading manifest file '%s'", self.manifest)
+ manifest = open(self.manifest)
+ for line in manifest:
+ # ignore comments and blank lines
+ line = line.strip()
+ if line.startswith('#') or not line:
+ continue
+ self.filelist.append(line)
+ manifest.close()
+
+ def make_release_tree(self, base_dir, files):
+ """Create the directory tree that will become the source
+ distribution archive. All directories implied by the filenames in
+ 'files' are created under 'base_dir', and then we hard link or copy
+ (if hard linking is unavailable) those files into place.
+ Essentially, this duplicates the developer's source tree, but in a
+ directory named after the distribution, containing only the files
+ to be distributed.
+ """
+ # Create all the directories under 'base_dir' necessary to
+ # put 'files' there; the 'mkpath()' is just so we don't die
+ # if the manifest happens to be empty.
+ self.mkpath(base_dir)
+ dir_util.create_tree(base_dir, files, dry_run=self.dry_run)
+
+ # And walk over the list of files, either making a hard link (if
+ # os.link exists) to each one that doesn't already exist in its
+ # corresponding location under 'base_dir', or copying each file
+ # that's out-of-date in 'base_dir'. (Usually, all files will be
+ # out-of-date, because by default we blow away 'base_dir' when
+ # we're done making the distribution archives.)
+
+ if hasattr(os, 'link'): # can make hard links on this system
+ link = 'hard'
+ msg = "making hard links in %s..." % base_dir
+ else: # nope, have to copy
+ link = None
+ msg = "copying files to %s..." % base_dir
+
+ if not files:
+ log.warn("no files to distribute -- empty manifest?")
+ else:
+ log.info(msg)
+ for file in files:
+ if not os.path.isfile(file):
+ log.warn("'%s' not a regular file -- skipping" % file)
+ else:
+ dest = os.path.join(base_dir, file)
+ self.copy_file(file, dest, link=link)
+
+ self.distribution.metadata.write_pkg_info(base_dir)
+
+ def make_distribution(self):
+ """Create the source distribution(s). First, we create the release
+ tree with 'make_release_tree()'; then, we create all required
+ archive files (according to 'self.formats') from the release tree.
+ Finally, we clean up by blowing away the release tree (unless
+ 'self.keep_temp' is true). The list of archive files created is
+ stored so it can be retrieved later by 'get_archive_files()'.
+ """
+ # Don't warn about missing meta-data here -- should be (and is!)
+ # done elsewhere.
+ base_dir = self.distribution.get_fullname()
+ base_name = os.path.join(self.dist_dir, base_dir)
+
+ self.make_release_tree(base_dir, self.filelist.files)
+ archive_files = [] # remember names of files we create
+ # tar archive must be created last to avoid overwrite and remove
+ if 'tar' in self.formats:
+ self.formats.append(self.formats.pop(self.formats.index('tar')))
+
+ for fmt in self.formats:
+ file = self.make_archive(base_name, fmt, base_dir=base_dir,
+ owner=self.owner, group=self.group)
+ archive_files.append(file)
+ self.distribution.dist_files.append(('sdist', '', file))
+
+ self.archive_files = archive_files
+
+ if not self.keep_temp:
+ dir_util.remove_tree(base_dir, dry_run=self.dry_run)
+
+ def get_archive_files(self):
+ """Return the list of archive files created when the command
+ was run, or None if the command hasn't run yet.
+ """
+ return self.archive_files
diff --git a/cashew/Lib/distutils/command/upload.py b/cashew/Lib/distutils/command/upload.py
new file mode 100644
index 0000000..ff043d2
--- /dev/null
+++ b/cashew/Lib/distutils/command/upload.py
@@ -0,0 +1,194 @@
+"""distutils.command.upload
+
+Implements the Distutils 'upload' subcommand (upload package to PyPI)."""
+import os
+import socket
+import platform
+from urllib2 import urlopen, Request, HTTPError
+from base64 import standard_b64encode
+import urlparse
+import cStringIO as StringIO
+from hashlib import md5
+
+from distutils.errors import DistutilsError, DistutilsOptionError
+from distutils.core import PyPIRCCommand
+from distutils.spawn import spawn
+from distutils import log
+
+class upload(PyPIRCCommand):
+
+ description = "upload binary package to PyPI"
+
+ user_options = PyPIRCCommand.user_options + [
+ ('sign', 's',
+ 'sign files to upload using gpg'),
+ ('identity=', 'i', 'GPG identity used to sign files'),
+ ]
+
+ boolean_options = PyPIRCCommand.boolean_options + ['sign']
+
+ def initialize_options(self):
+ PyPIRCCommand.initialize_options(self)
+ self.username = ''
+ self.password = ''
+ self.show_response = 0
+ self.sign = False
+ self.identity = None
+
+ def finalize_options(self):
+ PyPIRCCommand.finalize_options(self)
+ if self.identity and not self.sign:
+ raise DistutilsOptionError(
+ "Must use --sign for --identity to have meaning"
+ )
+ config = self._read_pypirc()
+ if config != {}:
+ self.username = config['username']
+ self.password = config['password']
+ self.repository = config['repository']
+ self.realm = config['realm']
+
+ # getting the password from the distribution
+ # if previously set by the register command
+ if not self.password and self.distribution.password:
+ self.password = self.distribution.password
+
+ def run(self):
+ if not self.distribution.dist_files:
+ msg = ("Must create and upload files in one command "
+ "(e.g. setup.py sdist upload)")
+ raise DistutilsOptionError(msg)
+ for command, pyversion, filename in self.distribution.dist_files:
+ self.upload_file(command, pyversion, filename)
+
+ def upload_file(self, command, pyversion, filename):
+ # Makes sure the repository URL is compliant
+ schema, netloc, url, params, query, fragments = \
+ urlparse.urlparse(self.repository)
+ if params or query or fragments:
+ raise AssertionError("Incompatible url %s" % self.repository)
+
+ if schema not in ('http', 'https'):
+ raise AssertionError("unsupported schema " + schema)
+
+ # Sign if requested
+ if self.sign:
+ gpg_args = ["gpg", "--detach-sign", "-a", filename]
+ if self.identity:
+ gpg_args[2:2] = ["--local-user", self.identity]
+ spawn(gpg_args,
+ dry_run=self.dry_run)
+
+ # Fill in the data - send all the meta-data in case we need to
+ # register a new release
+ f = open(filename,'rb')
+ try:
+ content = f.read()
+ finally:
+ f.close()
+ meta = self.distribution.metadata
+ data = {
+ # action
+ ':action': 'file_upload',
+ 'protcol_version': '1',
+
+ # identify release
+ 'name': meta.get_name(),
+ 'version': meta.get_version(),
+
+ # file content
+ 'content': (os.path.basename(filename),content),
+ 'filetype': command,
+ 'pyversion': pyversion,
+ 'md5_digest': md5(content).hexdigest(),
+
+ # additional meta-data
+ 'metadata_version' : '1.0',
+ 'summary': meta.get_description(),
+ 'home_page': meta.get_url(),
+ 'author': meta.get_contact(),
+ 'author_email': meta.get_contact_email(),
+ 'license': meta.get_licence(),
+ 'description': meta.get_long_description(),
+ 'keywords': meta.get_keywords(),
+ 'platform': meta.get_platforms(),
+ 'classifiers': meta.get_classifiers(),
+ 'download_url': meta.get_download_url(),
+ # PEP 314
+ 'provides': meta.get_provides(),
+ 'requires': meta.get_requires(),
+ 'obsoletes': meta.get_obsoletes(),
+ }
+ comment = ''
+ if command == 'bdist_rpm':
+ dist, version, id = platform.dist()
+ if dist:
+ comment = 'built for %s %s' % (dist, version)
+ elif command == 'bdist_dumb':
+ comment = 'built for %s' % platform.platform(terse=1)
+ data['comment'] = comment
+
+ if self.sign:
+ data['gpg_signature'] = (os.path.basename(filename) + ".asc",
+ open(filename+".asc").read())
+
+ # set up the authentication
+ auth = "Basic " + standard_b64encode(self.username + ":" +
+ self.password)
+
+ # Build up the MIME payload for the POST data
+ boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+ sep_boundary = '\r\n--' + boundary
+ end_boundary = sep_boundary + '--\r\n'
+ body = StringIO.StringIO()
+ for key, value in data.items():
+ # handle multiple entries for the same name
+ if not isinstance(value, list):
+ value = [value]
+ for value in value:
+ if isinstance(value, tuple):
+ fn = ';filename="%s"' % value[0]
+ value = value[1]
+ else:
+ fn = ""
+
+ body.write(sep_boundary)
+ body.write('\r\nContent-Disposition: form-data; name="%s"' % key)
+ body.write(fn)
+ body.write("\r\n\r\n")
+ body.write(value)
+ body.write(end_boundary)
+ body = body.getvalue()
+
+ self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO)
+
+ # build the Request
+ headers = {'Content-type':
+ 'multipart/form-data; boundary=%s' % boundary,
+ 'Content-length': str(len(body)),
+ 'Authorization': auth}
+
+ request = Request(self.repository, data=body,
+ headers=headers)
+ # send the data
+ try:
+ result = urlopen(request)
+ status = result.getcode()
+ reason = result.msg
+ if self.show_response:
+ msg = '\n'.join(('-' * 75, result.read(), '-' * 75))
+ self.announce(msg, log.INFO)
+ except socket.error, e:
+ self.announce(str(e), log.ERROR)
+ raise
+ except HTTPError, e:
+ status = e.code
+ reason = e.msg
+
+ if status == 200:
+ self.announce('Server response (%s): %s' % (status, reason),
+ log.INFO)
+ else:
+ msg = 'Upload failed (%s): %s' % (status, reason)
+ self.announce(msg, log.ERROR)
+ raise DistutilsError(msg)
diff --git a/cashew/Lib/distutils/config.py b/cashew/Lib/distutils/config.py
new file mode 100644
index 0000000..9e18c00
--- /dev/null
+++ b/cashew/Lib/distutils/config.py
@@ -0,0 +1,116 @@
+"""distutils.pypirc
+
+Provides the PyPIRCCommand class, the base class for the command classes
+that uses .pypirc in the distutils.command package.
+"""
+import os
+from ConfigParser import ConfigParser
+
+from distutils.cmd import Command
+
+DEFAULT_PYPIRC = """\
+[distutils]
+index-servers =
+ pypi
+
+[pypi]
+username:%s
+password:%s
+"""
+
+class PyPIRCCommand(Command):
+ """Base command that knows how to handle the .pypirc file
+ """
+ DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/'
+ DEFAULT_REALM = 'pypi'
+ repository = None
+ realm = None
+
+ user_options = [
+ ('repository=', 'r',
+ "url of repository [default: %s]" % \
+ DEFAULT_REPOSITORY),
+ ('show-response', None,
+ 'display full response text from server')]
+
+ boolean_options = ['show-response']
+
+ def _get_rc_file(self):
+ """Returns rc file path."""
+ return os.path.join(os.path.expanduser('~'), '.pypirc')
+
+ def _store_pypirc(self, username, password):
+ """Creates a default .pypirc file."""
+ rc = self._get_rc_file()
+ f = os.fdopen(os.open(rc, os.O_CREAT | os.O_WRONLY, 0600), 'w')
+ try:
+ f.write(DEFAULT_PYPIRC % (username, password))
+ finally:
+ f.close()
+
+ def _read_pypirc(self):
+ """Reads the .pypirc file."""
+ rc = self._get_rc_file()
+ if os.path.exists(rc):
+ self.announce('Using PyPI login from %s' % rc)
+ repository = self.repository or self.DEFAULT_REPOSITORY
+ config = ConfigParser()
+ config.read(rc)
+ sections = config.sections()
+ if 'distutils' in sections:
+ # let's get the list of servers
+ index_servers = config.get('distutils', 'index-servers')
+ _servers = [server.strip() for server in
+ index_servers.split('\n')
+ if server.strip() != '']
+ if _servers == []:
+ # nothing set, let's try to get the default pypi
+ if 'pypi' in sections:
+ _servers = ['pypi']
+ else:
+ # the file is not properly defined, returning
+ # an empty dict
+ return {}
+ for server in _servers:
+ current = {'server': server}
+ current['username'] = config.get(server, 'username')
+
+ # optional params
+ for key, default in (('repository',
+ self.DEFAULT_REPOSITORY),
+ ('realm', self.DEFAULT_REALM),
+ ('password', None)):
+ if config.has_option(server, key):
+ current[key] = config.get(server, key)
+ else:
+ current[key] = default
+ if (current['server'] == repository or
+ current['repository'] == repository):
+ return current
+ elif 'server-login' in sections:
+ # old format
+ server = 'server-login'
+ if config.has_option(server, 'repository'):
+ repository = config.get(server, 'repository')
+ else:
+ repository = self.DEFAULT_REPOSITORY
+ return {'username': config.get(server, 'username'),
+ 'password': config.get(server, 'password'),
+ 'repository': repository,
+ 'server': server,
+ 'realm': self.DEFAULT_REALM}
+
+ return {}
+
+ def initialize_options(self):
+ """Initialize options."""
+ self.repository = None
+ self.realm = None
+ self.show_response = 0
+
+ def finalize_options(self):
+ """Finalizes options."""
+ if self.repository is None:
+ self.repository = self.DEFAULT_REPOSITORY
+ if self.realm is None:
+ self.realm = self.DEFAULT_REALM
diff --git a/cashew/Lib/distutils/core.py b/cashew/Lib/distutils/core.py
new file mode 100644
index 0000000..fcb2060
--- /dev/null
+++ b/cashew/Lib/distutils/core.py
@@ -0,0 +1,239 @@
+"""distutils.core
+
+The only module that needs to be imported to use the Distutils; provides
+the 'setup' function (which is to be called from the setup script). Also
+indirectly provides the Distribution and Command classes, although they are
+really defined in distutils.dist and distutils.cmd.
+"""
+
+__revision__ = "$Id$"
+
+import sys
+import os
+
+from distutils.debug import DEBUG
+from distutils.errors import (DistutilsSetupError, DistutilsArgError,
+ DistutilsError, CCompilerError)
+
+# Mainly import these so setup scripts can "from distutils.core import" them.
+from distutils.dist import Distribution
+from distutils.cmd import Command
+from distutils.config import PyPIRCCommand
+from distutils.extension import Extension
+
+# This is a barebones help message generated displayed when the user
+# runs the setup script with no arguments at all. More useful help
+# is generated with various --help options: global help, list commands,
+# and per-command help.
+USAGE = """\
+usage: %(script)s [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
+ or: %(script)s --help [cmd1 cmd2 ...]
+ or: %(script)s --help-commands
+ or: %(script)s cmd --help
+"""
+
+def gen_usage(script_name):
+ script = os.path.basename(script_name)
+ return USAGE % {'script': script}
+
+
+# Some mild magic to control the behaviour of 'setup()' from 'run_setup()'.
+_setup_stop_after = None
+_setup_distribution = None
+
+# Legal keyword arguments for the setup() function
+setup_keywords = ('distclass', 'script_name', 'script_args', 'options',
+ 'name', 'version', 'author', 'author_email',
+ 'maintainer', 'maintainer_email', 'url', 'license',
+ 'description', 'long_description', 'keywords',
+ 'platforms', 'classifiers', 'download_url',
+ 'requires', 'provides', 'obsoletes',
+ )
+
+# Legal keyword arguments for the Extension constructor
+extension_keywords = ('name', 'sources', 'include_dirs',
+ 'define_macros', 'undef_macros',
+ 'library_dirs', 'libraries', 'runtime_library_dirs',
+ 'extra_objects', 'extra_compile_args', 'extra_link_args',
+ 'swig_opts', 'export_symbols', 'depends', 'language')
+
+def setup(**attrs):
+ """The gateway to the Distutils: do everything your setup script needs
+ to do, in a highly flexible and user-driven way. Briefly: create a
+ Distribution instance; find and parse config files; parse the command
+ line; run each Distutils command found there, customized by the options
+ supplied to 'setup()' (as keyword arguments), in config files, and on
+ the command line.
+
+ The Distribution instance might be an instance of a class supplied via
+ the 'distclass' keyword argument to 'setup'; if no such class is
+ supplied, then the Distribution class (in dist.py) is instantiated.
+ All other arguments to 'setup' (except for 'cmdclass') are used to set
+ attributes of the Distribution instance.
+
+ The 'cmdclass' argument, if supplied, is a dictionary mapping command
+ names to command classes. Each command encountered on the command line
+ will be turned into a command class, which is in turn instantiated; any
+ class found in 'cmdclass' is used in place of the default, which is
+ (for command 'foo_bar') class 'foo_bar' in module
+ 'distutils.command.foo_bar'. The command class must provide a
+ 'user_options' attribute which is a list of option specifiers for
+ 'distutils.fancy_getopt'. Any command-line options between the current
+ and the next command are used to set attributes of the current command
+ object.
+
+ When the entire command-line has been successfully parsed, calls the
+ 'run()' method on each command object in turn. This method will be
+ driven entirely by the Distribution object (which each command object
+ has a reference to, thanks to its constructor), and the
+ command-specific options that became attributes of each command
+ object.
+ """
+
+ global _setup_stop_after, _setup_distribution
+
+ # Determine the distribution class -- either caller-supplied or
+ # our Distribution (see below).
+ klass = attrs.get('distclass')
+ if klass:
+ del attrs['distclass']
+ else:
+ klass = Distribution
+
+ if 'script_name' not in attrs:
+ attrs['script_name'] = os.path.basename(sys.argv[0])
+ if 'script_args' not in attrs:
+ attrs['script_args'] = sys.argv[1:]
+
+ # Create the Distribution instance, using the remaining arguments
+ # (ie. everything except distclass) to initialize it
+ try:
+ _setup_distribution = dist = klass(attrs)
+ except DistutilsSetupError, msg:
+ if 'name' in attrs:
+ raise SystemExit, "error in %s setup command: %s" % \
+ (attrs['name'], msg)
+ else:
+ raise SystemExit, "error in setup command: %s" % msg
+
+ if _setup_stop_after == "init":
+ return dist
+
+ # Find and parse the config file(s): they will override options from
+ # the setup script, but be overridden by the command line.
+ dist.parse_config_files()
+
+ if DEBUG:
+ print "options (after parsing config files):"
+ dist.dump_option_dicts()
+
+ if _setup_stop_after == "config":
+ return dist
+
+ # Parse the command line and override config files; any
+ # command-line errors are the end user's fault, so turn them into
+ # SystemExit to suppress tracebacks.
+ try:
+ ok = dist.parse_command_line()
+ except DistutilsArgError, msg:
+ raise SystemExit, gen_usage(dist.script_name) + "\nerror: %s" % msg
+
+ if DEBUG:
+ print "options (after parsing command line):"
+ dist.dump_option_dicts()
+
+ if _setup_stop_after == "commandline":
+ return dist
+
+ # And finally, run all the commands found on the command line.
+ if ok:
+ try:
+ dist.run_commands()
+ except KeyboardInterrupt:
+ raise SystemExit, "interrupted"
+ except (IOError, os.error), exc:
+ if DEBUG:
+ sys.stderr.write("error: %s\n" % (exc,))
+ raise
+ else:
+ raise SystemExit, "error: %s" % (exc,)
+
+ except (DistutilsError,
+ CCompilerError), msg:
+ if DEBUG:
+ raise
+ else:
+ raise SystemExit, "error: " + str(msg)
+
+ return dist
+
+
+def run_setup(script_name, script_args=None, stop_after="run"):
+ """Run a setup script in a somewhat controlled environment, and
+ return the Distribution instance that drives things. This is useful
+ if you need to find out the distribution meta-data (passed as
+ keyword args from 'script' to 'setup()', or the contents of the
+ config files or command-line.
+
+ 'script_name' is a file that will be run with 'execfile()';
+ 'sys.argv[0]' will be replaced with 'script' for the duration of the
+ call. 'script_args' is a list of strings; if supplied,
+ 'sys.argv[1:]' will be replaced by 'script_args' for the duration of
+ the call.
+
+ 'stop_after' tells 'setup()' when to stop processing; possible
+ values:
+ init
+ stop after the Distribution instance has been created and
+ populated with the keyword arguments to 'setup()'
+ config
+ stop after config files have been parsed (and their data
+ stored in the Distribution instance)
+ commandline
+ stop after the command-line ('sys.argv[1:]' or 'script_args')
+ have been parsed (and the data stored in the Distribution)
+ run [default]
+ stop after all commands have been run (the same as if 'setup()'
+ had been called in the usual way
+
+ Returns the Distribution instance, which provides all information
+ used to drive the Distutils.
+ """
+ if stop_after not in ('init', 'config', 'commandline', 'run'):
+ raise ValueError, "invalid value for 'stop_after': %r" % (stop_after,)
+
+ global _setup_stop_after, _setup_distribution
+ _setup_stop_after = stop_after
+
+ save_argv = sys.argv
+ g = {'__file__': script_name}
+ l = {}
+ try:
+ try:
+ sys.argv[0] = script_name
+ if script_args is not None:
+ sys.argv[1:] = script_args
+ f = open(script_name)
+ try:
+ exec f.read() in g, l
+ finally:
+ f.close()
+ finally:
+ sys.argv = save_argv
+ _setup_stop_after = None
+ except SystemExit:
+ # Hmm, should we do something if exiting with a non-zero code
+ # (ie. error)?
+ pass
+ except:
+ raise
+
+ if _setup_distribution is None:
+ raise RuntimeError, \
+ ("'distutils.core.setup()' was never called -- "
+ "perhaps '%s' is not a Distutils setup script?") % \
+ script_name
+
+ # I wonder if the setup script's namespace -- g and l -- would be of
+ # any interest to callers?
+ return _setup_distribution
diff --git a/cashew/Lib/distutils/cygwinccompiler.py b/cashew/Lib/distutils/cygwinccompiler.py
new file mode 100644
index 0000000..258e138
--- /dev/null
+++ b/cashew/Lib/distutils/cygwinccompiler.py
@@ -0,0 +1,463 @@
+"""distutils.cygwinccompiler
+
+Provides the CygwinCCompiler class, a subclass of UnixCCompiler that
+handles the Cygwin port of the GNU C compiler to Windows. It also contains
+the Mingw32CCompiler class which handles the mingw32 port of GCC (same as
+cygwin in no-cygwin mode).
+"""
+
+# problems:
+#
+# * if you use a msvc compiled python version (1.5.2)
+# 1. you have to insert a __GNUC__ section in its config.h
+# 2. you have to generate an import library for its dll
+# - create a def-file for python??.dll
+# - create an import library using
+# dlltool --dllname python15.dll --def python15.def \
+# --output-lib libpython15.a
+#
+# see also http://starship.python.net/crew/kernr/mingw32/Notes.html
+#
+# * We put export_symbols in a def-file, and don't use
+# --export-all-symbols because it doesn't worked reliable in some
+# tested configurations. And because other windows compilers also
+# need their symbols specified this no serious problem.
+#
+# tested configurations:
+#
+# * cygwin gcc 2.91.57/ld 2.9.4/dllwrap 0.2.4 works
+# (after patching python's config.h and for C++ some other include files)
+# see also http://starship.python.net/crew/kernr/mingw32/Notes.html
+# * mingw32 gcc 2.95.2/ld 2.9.4/dllwrap 0.2.4 works
+# (ld doesn't support -shared, so we use dllwrap)
+# * cygwin gcc 2.95.2/ld 2.10.90/dllwrap 2.10.90 works now
+# - its dllwrap doesn't work, there is a bug in binutils 2.10.90
+# see also http://sources.redhat.com/ml/cygwin/2000-06/msg01274.html
+# - using gcc -mdll instead dllwrap doesn't work without -static because
+# it tries to link against dlls instead their import libraries. (If
+# it finds the dll first.)
+# By specifying -static we force ld to link against the import libraries,
+# this is windows standard and there are normally not the necessary symbols
+# in the dlls.
+# *** only the version of June 2000 shows these problems
+# * cygwin gcc 3.2/ld 2.13.90 works
+# (ld supports -shared)
+# * mingw gcc 3.2/ld 2.13 works
+# (ld supports -shared)
+
+# This module should be kept compatible with Python 2.1.
+
+__revision__ = "$Id$"
+
+import os,sys,copy
+from distutils.ccompiler import gen_preprocess_options, gen_lib_options
+from distutils.unixccompiler import UnixCCompiler
+from distutils.file_util import write_file
+from distutils.errors import DistutilsExecError, CompileError, UnknownFileError
+from distutils import log
+
+def get_msvcr():
+ """Include the appropriate MSVC runtime library if Python was built
+ with MSVC 7.0 or later.
+ """
+ msc_pos = sys.version.find('MSC v.')
+ if msc_pos != -1:
+ msc_ver = sys.version[msc_pos+6:msc_pos+10]
+ if msc_ver == '1300':
+ # MSVC 7.0
+ return ['msvcr70']
+ elif msc_ver == '1310':
+ # MSVC 7.1
+ return ['msvcr71']
+ elif msc_ver == '1400':
+ # VS2005 / MSVC 8.0
+ return ['msvcr80']
+ elif msc_ver == '1500':
+ # VS2008 / MSVC 9.0
+ return ['msvcr90']
+ else:
+ raise ValueError("Unknown MS Compiler version %s " % msc_ver)
+
+
+class CygwinCCompiler (UnixCCompiler):
+
+ compiler_type = 'cygwin'
+ obj_extension = ".o"
+ static_lib_extension = ".a"
+ shared_lib_extension = ".dll"
+ static_lib_format = "lib%s%s"
+ shared_lib_format = "%s%s"
+ exe_extension = ".exe"
+
+ def __init__ (self, verbose=0, dry_run=0, force=0):
+
+ UnixCCompiler.__init__ (self, verbose, dry_run, force)
+
+ (status, details) = check_config_h()
+ self.debug_print("Python's GCC status: %s (details: %s)" %
+ (status, details))
+ if status is not CONFIG_H_OK:
+ self.warn(
+ "Python's pyconfig.h doesn't seem to support your compiler. "
+ "Reason: %s. "
+ "Compiling may fail because of undefined preprocessor macros."
+ % details)
+
+ self.gcc_version, self.ld_version, self.dllwrap_version = \
+ get_versions()
+ self.debug_print(self.compiler_type + ": gcc %s, ld %s, dllwrap %s\n" %
+ (self.gcc_version,
+ self.ld_version,
+ self.dllwrap_version) )
+
+ # ld_version >= "2.10.90" and < "2.13" should also be able to use
+ # gcc -mdll instead of dllwrap
+ # Older dllwraps had own version numbers, newer ones use the
+ # same as the rest of binutils ( also ld )
+ # dllwrap 2.10.90 is buggy
+ if self.ld_version >= "2.10.90":
+ self.linker_dll = "gcc"
+ else:
+ self.linker_dll = "dllwrap"
+
+ # ld_version >= "2.13" support -shared so use it instead of
+ # -mdll -static
+ if self.ld_version >= "2.13":
+ shared_option = "-shared"
+ else:
+ shared_option = "-mdll -static"
+
+ # Hard-code GCC because that's what this is all about.
+ # XXX optimization, warnings etc. should be customizable.
+ self.set_executables(compiler='gcc -mcygwin -O -Wall',
+ compiler_so='gcc -mcygwin -mdll -O -Wall',
+ compiler_cxx='g++ -mcygwin -O -Wall',
+ linker_exe='gcc -mcygwin',
+ linker_so=('%s -mcygwin %s' %
+ (self.linker_dll, shared_option)))
+
+ # cygwin and mingw32 need different sets of libraries
+ if self.gcc_version == "2.91.57":
+ # cygwin shouldn't need msvcrt, but without the dlls will crash
+ # (gcc version 2.91.57) -- perhaps something about initialization
+ self.dll_libraries=["msvcrt"]
+ self.warn(
+ "Consider upgrading to a newer version of gcc")
+ else:
+ # Include the appropriate MSVC runtime library if Python was built
+ # with MSVC 7.0 or later.
+ self.dll_libraries = get_msvcr()
+
+ # __init__ ()
+
+
+ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+ if ext == '.rc' or ext == '.res':
+ # gcc needs '.res' and '.rc' compiled to object files !!!
+ try:
+ self.spawn(["windres", "-i", src, "-o", obj])
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+ else: # for other files use the C-compiler
+ try:
+ self.spawn(self.compiler_so + cc_args + [src, '-o', obj] +
+ extra_postargs)
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ def link (self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir=None,
+ libraries=None,
+ library_dirs=None,
+ runtime_library_dirs=None,
+ export_symbols=None,
+ debug=0,
+ extra_preargs=None,
+ extra_postargs=None,
+ build_temp=None,
+ target_lang=None):
+
+ # use separate copies, so we can modify the lists
+ extra_preargs = copy.copy(extra_preargs or [])
+ libraries = copy.copy(libraries or [])
+ objects = copy.copy(objects or [])
+
+ # Additional libraries
+ libraries.extend(self.dll_libraries)
+
+ # handle export symbols by creating a def-file
+ # with executables this only works with gcc/ld as linker
+ if ((export_symbols is not None) and
+ (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")):
+ # (The linker doesn't do anything if output is up-to-date.
+ # So it would probably better to check if we really need this,
+ # but for this we had to insert some unchanged parts of
+ # UnixCCompiler, and this is not what we want.)
+
+ # we want to put some files in the same directory as the
+ # object files are, build_temp doesn't help much
+ # where are the object files
+ temp_dir = os.path.dirname(objects[0])
+ # name of dll to give the helper files the same base name
+ (dll_name, dll_extension) = os.path.splitext(
+ os.path.basename(output_filename))
+
+ # generate the filenames for these files
+ def_file = os.path.join(temp_dir, dll_name + ".def")
+ lib_file = os.path.join(temp_dir, 'lib' + dll_name + ".a")
+
+ # Generate .def file
+ contents = [
+ "LIBRARY %s" % os.path.basename(output_filename),
+ "EXPORTS"]
+ for sym in export_symbols:
+ contents.append(sym)
+ self.execute(write_file, (def_file, contents),
+ "writing %s" % def_file)
+
+ # next add options for def-file and to creating import libraries
+
+ # dllwrap uses different options than gcc/ld
+ if self.linker_dll == "dllwrap":
+ extra_preargs.extend(["--output-lib", lib_file])
+ # for dllwrap we have to use a special option
+ extra_preargs.extend(["--def", def_file])
+ # we use gcc/ld here and can be sure ld is >= 2.9.10
+ else:
+ # doesn't work: bfd_close build\...\libfoo.a: Invalid operation
+ #extra_preargs.extend(["-Wl,--out-implib,%s" % lib_file])
+ # for gcc/ld the def-file is specified as any object files
+ objects.append(def_file)
+
+ #end: if ((export_symbols is not None) and
+ # (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")):
+
+ # who wants symbols and a many times larger output file
+ # should explicitly switch the debug mode on
+ # otherwise we let dllwrap/ld strip the output file
+ # (On my machine: 10KB < stripped_file < ??100KB
+ # unstripped_file = stripped_file + XXX KB
+ # ( XXX=254 for a typical python extension))
+ if not debug:
+ extra_preargs.append("-s")
+
+ UnixCCompiler.link(self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir,
+ libraries,
+ library_dirs,
+ runtime_library_dirs,
+ None, # export_symbols, we do this in our def-file
+ debug,
+ extra_preargs,
+ extra_postargs,
+ build_temp,
+ target_lang)
+
+ # link ()
+
+ # -- Miscellaneous methods -----------------------------------------
+
+ # overwrite the one from CCompiler to support rc and res-files
+ def object_filenames (self,
+ source_filenames,
+ strip_dir=0,
+ output_dir=''):
+ if output_dir is None: output_dir = ''
+ obj_names = []
+ for src_name in source_filenames:
+ # use normcase to make sure '.rc' is really '.rc' and not '.RC'
+ (base, ext) = os.path.splitext (os.path.normcase(src_name))
+ if ext not in (self.src_extensions + ['.rc','.res']):
+ raise UnknownFileError, \
+ "unknown file type '%s' (from '%s')" % \
+ (ext, src_name)
+ if strip_dir:
+ base = os.path.basename (base)
+ if ext == '.res' or ext == '.rc':
+ # these need to be compiled to object files
+ obj_names.append (os.path.join (output_dir,
+ base + ext + self.obj_extension))
+ else:
+ obj_names.append (os.path.join (output_dir,
+ base + self.obj_extension))
+ return obj_names
+
+ # object_filenames ()
+
+# class CygwinCCompiler
+
+
+# the same as cygwin plus some additional parameters
+class Mingw32CCompiler (CygwinCCompiler):
+
+ compiler_type = 'mingw32'
+
+ def __init__ (self,
+ verbose=0,
+ dry_run=0,
+ force=0):
+
+ CygwinCCompiler.__init__ (self, verbose, dry_run, force)
+
+ # ld_version >= "2.13" support -shared so use it instead of
+ # -mdll -static
+ if self.ld_version >= "2.13":
+ shared_option = "-shared"
+ else:
+ shared_option = "-mdll -static"
+
+ # A real mingw32 doesn't need to specify a different entry point,
+ # but cygwin 2.91.57 in no-cygwin-mode needs it.
+ if self.gcc_version <= "2.91.57":
+ entry_point = '--entry _DllMain@12'
+ else:
+ entry_point = ''
+
+ if self.gcc_version < '4' or is_cygwingcc():
+ no_cygwin = ' -mno-cygwin'
+ else:
+ no_cygwin = ''
+
+ self.set_executables(compiler='gcc%s -O -Wall' % no_cygwin,
+ compiler_so='gcc%s -mdll -O -Wall' % no_cygwin,
+ compiler_cxx='g++%s -O -Wall' % no_cygwin,
+ linker_exe='gcc%s' % no_cygwin,
+ linker_so='%s%s %s %s'
+ % (self.linker_dll, no_cygwin,
+ shared_option, entry_point))
+ # Maybe we should also append -mthreads, but then the finished
+ # dlls need another dll (mingwm10.dll see Mingw32 docs)
+ # (-mthreads: Support thread-safe exception handling on `Mingw32')
+
+ # no additional libraries needed
+ self.dll_libraries=[]
+
+ # Include the appropriate MSVC runtime library if Python was built
+ # with MSVC 7.0 or later.
+ self.dll_libraries = get_msvcr()
+
+ # __init__ ()
+
+# class Mingw32CCompiler
+
+# Because these compilers aren't configured in Python's pyconfig.h file by
+# default, we should at least warn the user if he is using an unmodified
+# version.
+
+CONFIG_H_OK = "ok"
+CONFIG_H_NOTOK = "not ok"
+CONFIG_H_UNCERTAIN = "uncertain"
+
+def check_config_h():
+
+ """Check if the current Python installation (specifically, pyconfig.h)
+ appears amenable to building extensions with GCC. Returns a tuple
+ (status, details), where 'status' is one of the following constants:
+ CONFIG_H_OK
+ all is well, go ahead and compile
+ CONFIG_H_NOTOK
+ doesn't look good
+ CONFIG_H_UNCERTAIN
+ not sure -- unable to read pyconfig.h
+ 'details' is a human-readable string explaining the situation.
+
+ Note there are two ways to conclude "OK": either 'sys.version' contains
+ the string "GCC" (implying that this Python was built with GCC), or the
+ installed "pyconfig.h" contains the string "__GNUC__".
+ """
+
+ # XXX since this function also checks sys.version, it's not strictly a
+ # "pyconfig.h" check -- should probably be renamed...
+
+ from distutils import sysconfig
+ import string
+ # if sys.version contains GCC then python was compiled with
+ # GCC, and the pyconfig.h file should be OK
+ if string.find(sys.version,"GCC") >= 0:
+ return (CONFIG_H_OK, "sys.version mentions 'GCC'")
+
+ fn = sysconfig.get_config_h_filename()
+ try:
+ # It would probably better to read single lines to search.
+ # But we do this only once, and it is fast enough
+ f = open(fn)
+ try:
+ s = f.read()
+ finally:
+ f.close()
+
+ except IOError, exc:
+ # if we can't read this file, we cannot say it is wrong
+ # the compiler will complain later about this file as missing
+ return (CONFIG_H_UNCERTAIN,
+ "couldn't read '%s': %s" % (fn, exc.strerror))
+
+ else:
+ # "pyconfig.h" contains an "#ifdef __GNUC__" or something similar
+ if string.find(s,"__GNUC__") >= 0:
+ return (CONFIG_H_OK, "'%s' mentions '__GNUC__'" % fn)
+ else:
+ return (CONFIG_H_NOTOK, "'%s' does not mention '__GNUC__'" % fn)
+
+
+
+def get_versions():
+ """ Try to find out the versions of gcc, ld and dllwrap.
+ If not possible it returns None for it.
+ """
+ from distutils.version import LooseVersion
+ from distutils.spawn import find_executable
+ import re
+
+ gcc_exe = find_executable('gcc')
+ if gcc_exe:
+ out = os.popen(gcc_exe + ' -dumpversion','r')
+ out_string = out.read()
+ out.close()
+ result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
+ if result:
+ gcc_version = LooseVersion(result.group(1))
+ else:
+ gcc_version = None
+ else:
+ gcc_version = None
+ ld_exe = find_executable('ld')
+ if ld_exe:
+ out = os.popen(ld_exe + ' -v','r')
+ out_string = out.read()
+ out.close()
+ result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
+ if result:
+ ld_version = LooseVersion(result.group(1))
+ else:
+ ld_version = None
+ else:
+ ld_version = None
+ dllwrap_exe = find_executable('dllwrap')
+ if dllwrap_exe:
+ out = os.popen(dllwrap_exe + ' --version','r')
+ out_string = out.read()
+ out.close()
+ result = re.search(' (\d+\.\d+(\.\d+)*)',out_string)
+ if result:
+ dllwrap_version = LooseVersion(result.group(1))
+ else:
+ dllwrap_version = None
+ else:
+ dllwrap_version = None
+ return (gcc_version, ld_version, dllwrap_version)
+
+def is_cygwingcc():
+ '''Try to determine if the gcc that would be used is from cygwin.'''
+ out = os.popen('gcc -dumpmachine', 'r')
+ out_string = out.read()
+ out.close()
+ # out_string is the target triplet cpu-vendor-os
+ # Cygwin's gcc sets the os to 'cygwin'
+ return out_string.strip().endswith('cygwin')
diff --git a/cashew/Lib/distutils/debug.py b/cashew/Lib/distutils/debug.py
new file mode 100644
index 0000000..2886744
--- /dev/null
+++ b/cashew/Lib/distutils/debug.py
@@ -0,0 +1,7 @@
+import os
+
+__revision__ = "$Id$"
+
+# If DISTUTILS_DEBUG is anything other than the empty string, we run in
+# debug mode.
+DEBUG = os.environ.get('DISTUTILS_DEBUG')
diff --git a/cashew/Lib/distutils/dep_util.py b/cashew/Lib/distutils/dep_util.py
new file mode 100644
index 0000000..2b75905
--- /dev/null
+++ b/cashew/Lib/distutils/dep_util.py
@@ -0,0 +1,89 @@
+"""distutils.dep_util
+
+Utility functions for simple, timestamp-based dependency of files
+and groups of files; also, function based entirely on such
+timestamp dependency analysis."""
+
+__revision__ = "$Id$"
+
+import os
+from stat import ST_MTIME
+from distutils.errors import DistutilsFileError
+
+def newer(source, target):
+ """Tells if the target is newer than the source.
+
+ Return true if 'source' exists and is more recently modified than
+ 'target', or if 'source' exists and 'target' doesn't.
+
+ Return false if both exist and 'target' is the same age or younger
+ than 'source'. Raise DistutilsFileError if 'source' does not exist.
+
+ Note that this test is not very accurate: files created in the same second
+ will have the same "age".
+ """
+ if not os.path.exists(source):
+ raise DistutilsFileError("file '%s' does not exist" %
+ os.path.abspath(source))
+ if not os.path.exists(target):
+ return True
+
+ return os.stat(source)[ST_MTIME] > os.stat(target)[ST_MTIME]
+
+def newer_pairwise(sources, targets):
+ """Walk two filename lists in parallel, testing if each source is newer
+ than its corresponding target. Return a pair of lists (sources,
+ targets) where source is newer than target, according to the semantics
+ of 'newer()'.
+ """
+ if len(sources) != len(targets):
+ raise ValueError, "'sources' and 'targets' must be same length"
+
+ # build a pair of lists (sources, targets) where source is newer
+ n_sources = []
+ n_targets = []
+ for source, target in zip(sources, targets):
+ if newer(source, target):
+ n_sources.append(source)
+ n_targets.append(target)
+
+ return n_sources, n_targets
+
+def newer_group(sources, target, missing='error'):
+ """Return true if 'target' is out-of-date with respect to any file
+ listed in 'sources'.
+
+ In other words, if 'target' exists and is newer
+ than every file in 'sources', return false; otherwise return true.
+ 'missing' controls what we do when a source file is missing; the
+ default ("error") is to blow up with an OSError from inside 'stat()';
+ if it is "ignore", we silently drop any missing source files; if it is
+ "newer", any missing source files make us assume that 'target' is
+ out-of-date (this is handy in "dry-run" mode: it'll make you pretend to
+ carry out commands that wouldn't work because inputs are missing, but
+ that doesn't matter because you're not actually going to run the
+ commands).
+ """
+ # If the target doesn't even exist, then it's definitely out-of-date.
+ if not os.path.exists(target):
+ return True
+
+ # Otherwise we have to find out the hard way: if *any* source file
+ # is more recent than 'target', then 'target' is out-of-date and
+ # we can immediately return true. If we fall through to the end
+ # of the loop, then 'target' is up-to-date and we return false.
+ target_mtime = os.stat(target)[ST_MTIME]
+
+ for source in sources:
+ if not os.path.exists(source):
+ if missing == 'error': # blow up when we stat() the file
+ pass
+ elif missing == 'ignore': # missing source dropped from
+ continue # target's dependency list
+ elif missing == 'newer': # missing source means target is
+ return True # out-of-date
+
+ if os.stat(source)[ST_MTIME] > target_mtime:
+ return True
+
+ return False
diff --git a/cashew/Lib/distutils/dir_util.py b/cashew/Lib/distutils/dir_util.py
new file mode 100644
index 0000000..f90318e
--- /dev/null
+++ b/cashew/Lib/distutils/dir_util.py
@@ -0,0 +1,214 @@
+"""distutils.dir_util
+
+Utility functions for manipulating directories and directory trees."""
+
+__revision__ = "$Id$"
+
+import os
+import errno
+from distutils.errors import DistutilsFileError, DistutilsInternalError
+from distutils import log
+
+# cache for by mkpath() -- in addition to cheapening redundant calls,
+# eliminates redundant "creating /foo/bar/baz" messages in dry-run mode
+_path_created = {}
+
+# I don't use os.makedirs because a) it's new to Python 1.5.2, and
+# b) it blows up if the directory already exists (I want to silently
+# succeed in that case).
+def mkpath(name, mode=0777, verbose=1, dry_run=0):
+ """Create a directory and any missing ancestor directories.
+
+ If the directory already exists (or if 'name' is the empty string, which
+ means the current directory, which of course exists), then do nothing.
+ Raise DistutilsFileError if unable to create some directory along the way
+ (eg. some sub-path exists, but is a file rather than a directory).
+ If 'verbose' is true, print a one-line summary of each mkdir to stdout.
+ Return the list of directories actually created.
+ """
+
+ global _path_created
+
+ # Detect a common bug -- name is None
+ if not isinstance(name, basestring):
+ raise DistutilsInternalError, \
+ "mkpath: 'name' must be a string (got %r)" % (name,)
+
+ # XXX what's the better way to handle verbosity? print as we create
+ # each directory in the path (the current behaviour), or only announce
+ # the creation of the whole path? (quite easy to do the latter since
+ # we're not using a recursive algorithm)
+
+ name = os.path.normpath(name)
+ created_dirs = []
+ if os.path.isdir(name) or name == '':
+ return created_dirs
+ if _path_created.get(os.path.abspath(name)):
+ return created_dirs
+
+ (head, tail) = os.path.split(name)
+ tails = [tail] # stack of lone dirs to create
+
+ while head and tail and not os.path.isdir(head):
+ (head, tail) = os.path.split(head)
+ tails.insert(0, tail) # push next higher dir onto stack
+
+ # now 'head' contains the deepest directory that already exists
+ # (that is, the child of 'head' in 'name' is the highest directory
+ # that does *not* exist)
+ for d in tails:
+ #print "head = %s, d = %s: " % (head, d),
+ head = os.path.join(head, d)
+ abs_head = os.path.abspath(head)
+
+ if _path_created.get(abs_head):
+ continue
+
+ if verbose >= 1:
+ log.info("creating %s", head)
+
+ if not dry_run:
+ try:
+ os.mkdir(head, mode)
+ except OSError, exc:
+ if not (exc.errno == errno.EEXIST and os.path.isdir(head)):
+ raise DistutilsFileError(
+ "could not create '%s': %s" % (head, exc.args[-1]))
+ created_dirs.append(head)
+
+ _path_created[abs_head] = 1
+ return created_dirs
+
+def create_tree(base_dir, files, mode=0777, verbose=1, dry_run=0):
+ """Create all the empty directories under 'base_dir' needed to put 'files'
+ there.
+
+ 'base_dir' is just the name of a directory which doesn't necessarily
+ exist yet; 'files' is a list of filenames to be interpreted relative to
+ 'base_dir'. 'base_dir' + the directory portion of every file in 'files'
+ will be created if it doesn't already exist. 'mode', 'verbose' and
+ 'dry_run' flags are as for 'mkpath()'.
+ """
+ # First get the list of directories to create
+ need_dir = {}
+ for file in files:
+ need_dir[os.path.join(base_dir, os.path.dirname(file))] = 1
+ need_dirs = need_dir.keys()
+ need_dirs.sort()
+
+ # Now create them
+ for dir in need_dirs:
+ mkpath(dir, mode, verbose=verbose, dry_run=dry_run)
+
+def copy_tree(src, dst, preserve_mode=1, preserve_times=1,
+ preserve_symlinks=0, update=0, verbose=1, dry_run=0):
+ """Copy an entire directory tree 'src' to a new location 'dst'.
+
+ Both 'src' and 'dst' must be directory names. If 'src' is not a
+ directory, raise DistutilsFileError. If 'dst' does not exist, it is
+ created with 'mkpath()'. The end result of the copy is that every
+ file in 'src' is copied to 'dst', and directories under 'src' are
+ recursively copied to 'dst'. Return the list of files that were
+ copied or might have been copied, using their output name. The
+ return value is unaffected by 'update' or 'dry_run': it is simply
+ the list of all files under 'src', with the names changed to be
+ under 'dst'.
+
+ 'preserve_mode' and 'preserve_times' are the same as for
+ 'copy_file'; note that they only apply to regular files, not to
+ directories. If 'preserve_symlinks' is true, symlinks will be
+ copied as symlinks (on platforms that support them!); otherwise
+ (the default), the destination of the symlink will be copied.
+ 'update' and 'verbose' are the same as for 'copy_file'.
+ """
+ from distutils.file_util import copy_file
+
+ if not dry_run and not os.path.isdir(src):
+ raise DistutilsFileError, \
+ "cannot copy tree '%s': not a directory" % src
+ try:
+ names = os.listdir(src)
+ except os.error, (errno, errstr):
+ if dry_run:
+ names = []
+ else:
+ raise DistutilsFileError, \
+ "error listing files in '%s': %s" % (src, errstr)
+
+ if not dry_run:
+ mkpath(dst, verbose=verbose)
+
+ outputs = []
+
+ for n in names:
+ src_name = os.path.join(src, n)
+ dst_name = os.path.join(dst, n)
+
+ if n.startswith('.nfs'):
+ # skip NFS rename files
+ continue
+
+ if preserve_symlinks and os.path.islink(src_name):
+ link_dest = os.readlink(src_name)
+ if verbose >= 1:
+ log.info("linking %s -> %s", dst_name, link_dest)
+ if not dry_run:
+ os.symlink(link_dest, dst_name)
+ outputs.append(dst_name)
+
+ elif os.path.isdir(src_name):
+ outputs.extend(
+ copy_tree(src_name, dst_name, preserve_mode,
+ preserve_times, preserve_symlinks, update,
+ verbose=verbose, dry_run=dry_run))
+ else:
+ copy_file(src_name, dst_name, preserve_mode,
+ preserve_times, update, verbose=verbose,
+ dry_run=dry_run)
+ outputs.append(dst_name)
+
+ return outputs
+
+def _build_cmdtuple(path, cmdtuples):
+ """Helper for remove_tree()."""
+ for f in os.listdir(path):
+ real_f = os.path.join(path,f)
+ if os.path.isdir(real_f) and not os.path.islink(real_f):
+ _build_cmdtuple(real_f, cmdtuples)
+ else:
+ cmdtuples.append((os.remove, real_f))
+ cmdtuples.append((os.rmdir, path))
+
+def remove_tree(directory, verbose=1, dry_run=0):
+ """Recursively remove an entire directory tree.
+
+ Any errors are ignored (apart from being reported to stdout if 'verbose'
+ is true).
+ """
+ global _path_created
+
+ if verbose >= 1:
+ log.info("removing '%s' (and everything under it)", directory)
+ if dry_run:
+ return
+ cmdtuples = []
+ _build_cmdtuple(directory, cmdtuples)
+ for cmd in cmdtuples:
+ try:
+ cmd[0](cmd[1])
+ # remove dir from cache if it's already there
+ abspath = os.path.abspath(cmd[1])
+ if abspath in _path_created:
+ del _path_created[abspath]
+ except (IOError, OSError), exc:
+ log.warn("error removing %s: %s", directory, exc)
+
+def ensure_relative(path):
+ """Take the full path 'path', and make it a relative path.
+
+ This is useful to make 'path' the second argument to os.path.join().
+ """
+ drive, path = os.path.splitdrive(path)
+ if path[0:1] == os.sep:
+ path = drive + path[1:]
+ return path
diff --git a/cashew/Lib/distutils/dist.py b/cashew/Lib/distutils/dist.py
new file mode 100644
index 0000000..e025313
--- /dev/null
+++ b/cashew/Lib/distutils/dist.py
@@ -0,0 +1,1249 @@
+"""distutils.dist
+
+Provides the Distribution class, which represents the module distribution
+being built/installed/distributed.
+"""
+
+__revision__ = "$Id$"
+
+import sys, os, re
+from email import message_from_file
+
+try:
+ import warnings
+except ImportError:
+ warnings = None
+
+from distutils.errors import (DistutilsOptionError, DistutilsArgError,
+ DistutilsModuleError, DistutilsClassError)
+from distutils.fancy_getopt import FancyGetopt, translate_longopt
+from distutils.util import check_environ, strtobool, rfc822_escape
+from distutils import log
+from distutils.debug import DEBUG
+
+# Encoding used for the PKG-INFO files
+PKG_INFO_ENCODING = 'utf-8'
+
+# Regex to define acceptable Distutils command names. This is not *quite*
+# the same as a Python NAME -- I don't allow leading underscores. The fact
+# that they're very similar is no coincidence; the default naming scheme is
+# to look for a Python module named after the command.
+command_re = re.compile (r'^[a-zA-Z]([a-zA-Z0-9_]*)$')
+
+
+class Distribution:
+ """The core of the Distutils. Most of the work hiding behind 'setup'
+ is really done within a Distribution instance, which farms the work out
+ to the Distutils commands specified on the command line.
+
+ Setup scripts will almost never instantiate Distribution directly,
+ unless the 'setup()' function is totally inadequate to their needs.
+ However, it is conceivable that a setup script might wish to subclass
+ Distribution for some specialized purpose, and then pass the subclass
+ to 'setup()' as the 'distclass' keyword argument. If so, it is
+ necessary to respect the expectations that 'setup' has of Distribution.
+ See the code for 'setup()', in core.py, for details.
+ """
+
+
+ # 'global_options' describes the command-line options that may be
+ # supplied to the setup script prior to any actual commands.
+ # Eg. "./setup.py -n" or "./setup.py --quiet" both take advantage of
+ # these global options. This list should be kept to a bare minimum,
+ # since every global option is also valid as a command option -- and we
+ # don't want to pollute the commands with too many options that they
+ # have minimal control over.
+ # The fourth entry for verbose means that it can be repeated.
+ global_options = [('verbose', 'v', "run verbosely (default)", 1),
+ ('quiet', 'q', "run quietly (turns verbosity off)"),
+ ('dry-run', 'n', "don't actually do anything"),
+ ('help', 'h', "show detailed help message"),
+ ('no-user-cfg', None,
+ 'ignore pydistutils.cfg in your home directory'),
+ ]
+
+ # 'common_usage' is a short (2-3 line) string describing the common
+ # usage of the setup script.
+ common_usage = """\
+Common commands: (see '--help-commands' for more)
+
+ setup.py build will build the package underneath 'build/'
+ setup.py install will install the package
+"""
+
+ # options that are not propagated to the commands
+ display_options = [
+ ('help-commands', None,
+ "list all available commands"),
+ ('name', None,
+ "print package name"),
+ ('version', 'V',
+ "print package version"),
+ ('fullname', None,
+ "print -"),
+ ('author', None,
+ "print the author's name"),
+ ('author-email', None,
+ "print the author's email address"),
+ ('maintainer', None,
+ "print the maintainer's name"),
+ ('maintainer-email', None,
+ "print the maintainer's email address"),
+ ('contact', None,
+ "print the maintainer's name if known, else the author's"),
+ ('contact-email', None,
+ "print the maintainer's email address if known, else the author's"),
+ ('url', None,
+ "print the URL for this package"),
+ ('license', None,
+ "print the license of the package"),
+ ('licence', None,
+ "alias for --license"),
+ ('description', None,
+ "print the package description"),
+ ('long-description', None,
+ "print the long package description"),
+ ('platforms', None,
+ "print the list of platforms"),
+ ('classifiers', None,
+ "print the list of classifiers"),
+ ('keywords', None,
+ "print the list of keywords"),
+ ('provides', None,
+ "print the list of packages/modules provided"),
+ ('requires', None,
+ "print the list of packages/modules required"),
+ ('obsoletes', None,
+ "print the list of packages/modules made obsolete")
+ ]
+ display_option_names = map(lambda x: translate_longopt(x[0]),
+ display_options)
+
+ # negative options are options that exclude other options
+ negative_opt = {'quiet': 'verbose'}
+
+
+ # -- Creation/initialization methods -------------------------------
+
+ def __init__ (self, attrs=None):
+ """Construct a new Distribution instance: initialize all the
+ attributes of a Distribution, and then use 'attrs' (a dictionary
+ mapping attribute names to values) to assign some of those
+ attributes their "real" values. (Any attributes not mentioned in
+ 'attrs' will be assigned to some null value: 0, None, an empty list
+ or dictionary, etc.) Most importantly, initialize the
+ 'command_obj' attribute to the empty dictionary; this will be
+ filled in with real command objects by 'parse_command_line()'.
+ """
+
+ # Default values for our command-line options
+ self.verbose = 1
+ self.dry_run = 0
+ self.help = 0
+ for attr in self.display_option_names:
+ setattr(self, attr, 0)
+
+ # Store the distribution meta-data (name, version, author, and so
+ # forth) in a separate object -- we're getting to have enough
+ # information here (and enough command-line options) that it's
+ # worth it. Also delegate 'get_XXX()' methods to the 'metadata'
+ # object in a sneaky and underhanded (but efficient!) way.
+ self.metadata = DistributionMetadata()
+ for basename in self.metadata._METHOD_BASENAMES:
+ method_name = "get_" + basename
+ setattr(self, method_name, getattr(self.metadata, method_name))
+
+ # 'cmdclass' maps command names to class objects, so we
+ # can 1) quickly figure out which class to instantiate when
+ # we need to create a new command object, and 2) have a way
+ # for the setup script to override command classes
+ self.cmdclass = {}
+
+ # 'command_packages' is a list of packages in which commands
+ # are searched for. The factory for command 'foo' is expected
+ # to be named 'foo' in the module 'foo' in one of the packages
+ # named here. This list is searched from the left; an error
+ # is raised if no named package provides the command being
+ # searched for. (Always access using get_command_packages().)
+ self.command_packages = None
+
+ # 'script_name' and 'script_args' are usually set to sys.argv[0]
+ # and sys.argv[1:], but they can be overridden when the caller is
+ # not necessarily a setup script run from the command-line.
+ self.script_name = None
+ self.script_args = None
+
+ # 'command_options' is where we store command options between
+ # parsing them (from config files, the command-line, etc.) and when
+ # they are actually needed -- ie. when the command in question is
+ # instantiated. It is a dictionary of dictionaries of 2-tuples:
+ # command_options = { command_name : { option : (source, value) } }
+ self.command_options = {}
+
+ # 'dist_files' is the list of (command, pyversion, file) that
+ # have been created by any dist commands run so far. This is
+ # filled regardless of whether the run is dry or not. pyversion
+ # gives sysconfig.get_python_version() if the dist file is
+ # specific to a Python version, 'any' if it is good for all
+ # Python versions on the target platform, and '' for a source
+ # file. pyversion should not be used to specify minimum or
+ # maximum required Python versions; use the metainfo for that
+ # instead.
+ self.dist_files = []
+
+ # These options are really the business of various commands, rather
+ # than of the Distribution itself. We provide aliases for them in
+ # Distribution as a convenience to the developer.
+ self.packages = None
+ self.package_data = {}
+ self.package_dir = None
+ self.py_modules = None
+ self.libraries = None
+ self.headers = None
+ self.ext_modules = None
+ self.ext_package = None
+ self.include_dirs = None
+ self.extra_path = None
+ self.scripts = None
+ self.data_files = None
+ self.password = ''
+
+ # And now initialize bookkeeping stuff that can't be supplied by
+ # the caller at all. 'command_obj' maps command names to
+ # Command instances -- that's how we enforce that every command
+ # class is a singleton.
+ self.command_obj = {}
+
+ # 'have_run' maps command names to boolean values; it keeps track
+ # of whether we have actually run a particular command, to make it
+ # cheap to "run" a command whenever we think we might need to -- if
+ # it's already been done, no need for expensive filesystem
+ # operations, we just check the 'have_run' dictionary and carry on.
+ # It's only safe to query 'have_run' for a command class that has
+ # been instantiated -- a false value will be inserted when the
+ # command object is created, and replaced with a true value when
+ # the command is successfully run. Thus it's probably best to use
+ # '.get()' rather than a straight lookup.
+ self.have_run = {}
+
+ # Now we'll use the attrs dictionary (ultimately, keyword args from
+ # the setup script) to possibly override any or all of these
+ # distribution options.
+
+ if attrs:
+ # Pull out the set of command options and work on them
+ # specifically. Note that this order guarantees that aliased
+ # command options will override any supplied redundantly
+ # through the general options dictionary.
+ options = attrs.get('options')
+ if options is not None:
+ del attrs['options']
+ for (command, cmd_options) in options.items():
+ opt_dict = self.get_option_dict(command)
+ for (opt, val) in cmd_options.items():
+ opt_dict[opt] = ("setup script", val)
+
+ if 'licence' in attrs:
+ attrs['license'] = attrs['licence']
+ del attrs['licence']
+ msg = "'licence' distribution option is deprecated; use 'license'"
+ if warnings is not None:
+ warnings.warn(msg)
+ else:
+ sys.stderr.write(msg + "\n")
+
+ # Now work on the rest of the attributes. Any attribute that's
+ # not already defined is invalid!
+ for (key, val) in attrs.items():
+ if hasattr(self.metadata, "set_" + key):
+ getattr(self.metadata, "set_" + key)(val)
+ elif hasattr(self.metadata, key):
+ setattr(self.metadata, key, val)
+ elif hasattr(self, key):
+ setattr(self, key, val)
+ else:
+ msg = "Unknown distribution option: %s" % repr(key)
+ if warnings is not None:
+ warnings.warn(msg)
+ else:
+ sys.stderr.write(msg + "\n")
+
+ # no-user-cfg is handled before other command line args
+ # because other args override the config files, and this
+ # one is needed before we can load the config files.
+ # If attrs['script_args'] wasn't passed, assume false.
+ #
+ # This also make sure we just look at the global options
+ self.want_user_cfg = True
+
+ if self.script_args is not None:
+ for arg in self.script_args:
+ if not arg.startswith('-'):
+ break
+ if arg == '--no-user-cfg':
+ self.want_user_cfg = False
+ break
+
+ self.finalize_options()
+
+ def get_option_dict(self, command):
+ """Get the option dictionary for a given command. If that
+ command's option dictionary hasn't been created yet, then create it
+ and return the new dictionary; otherwise, return the existing
+ option dictionary.
+ """
+ dict = self.command_options.get(command)
+ if dict is None:
+ dict = self.command_options[command] = {}
+ return dict
+
+ def dump_option_dicts(self, header=None, commands=None, indent=""):
+ from pprint import pformat
+
+ if commands is None: # dump all command option dicts
+ commands = self.command_options.keys()
+ commands.sort()
+
+ if header is not None:
+ self.announce(indent + header)
+ indent = indent + " "
+
+ if not commands:
+ self.announce(indent + "no commands known yet")
+ return
+
+ for cmd_name in commands:
+ opt_dict = self.command_options.get(cmd_name)
+ if opt_dict is None:
+ self.announce(indent +
+ "no option dict for '%s' command" % cmd_name)
+ else:
+ self.announce(indent +
+ "option dict for '%s' command:" % cmd_name)
+ out = pformat(opt_dict)
+ for line in out.split('\n'):
+ self.announce(indent + " " + line)
+
+ # -- Config file finding/parsing methods ---------------------------
+
+ def find_config_files(self):
+ """Find as many configuration files as should be processed for this
+ platform, and return a list of filenames in the order in which they
+ should be parsed. The filenames returned are guaranteed to exist
+ (modulo nasty race conditions).
+
+ There are three possible config files: distutils.cfg in the
+ Distutils installation directory (ie. where the top-level
+ Distutils __inst__.py file lives), a file in the user's home
+ directory named .pydistutils.cfg on Unix and pydistutils.cfg
+ on Windows/Mac; and setup.cfg in the current directory.
+
+ The file in the user's home directory can be disabled with the
+ --no-user-cfg option.
+ """
+ files = []
+ check_environ()
+
+ # Where to look for the system-wide Distutils config file
+ sys_dir = os.path.dirname(sys.modules['distutils'].__file__)
+
+ # Look for the system config file
+ sys_file = os.path.join(sys_dir, "distutils.cfg")
+ if os.path.isfile(sys_file):
+ files.append(sys_file)
+
+ # What to call the per-user config file
+ if os.name == 'posix':
+ user_filename = ".pydistutils.cfg"
+ else:
+ user_filename = "pydistutils.cfg"
+
+ # And look for the user config file
+ if self.want_user_cfg:
+ user_file = os.path.join(os.path.expanduser('~'), user_filename)
+ if os.path.isfile(user_file):
+ files.append(user_file)
+
+ # All platforms support local setup.cfg
+ local_file = "setup.cfg"
+ if os.path.isfile(local_file):
+ files.append(local_file)
+
+ if DEBUG:
+ self.announce("using config files: %s" % ', '.join(files))
+
+ return files
+
+ def parse_config_files(self, filenames=None):
+ from ConfigParser import ConfigParser
+
+ if filenames is None:
+ filenames = self.find_config_files()
+
+ if DEBUG:
+ self.announce("Distribution.parse_config_files():")
+
+ parser = ConfigParser()
+ for filename in filenames:
+ if DEBUG:
+ self.announce(" reading %s" % filename)
+ parser.read(filename)
+ for section in parser.sections():
+ options = parser.options(section)
+ opt_dict = self.get_option_dict(section)
+
+ for opt in options:
+ if opt != '__name__':
+ val = parser.get(section,opt)
+ opt = opt.replace('-', '_')
+ opt_dict[opt] = (filename, val)
+
+ # Make the ConfigParser forget everything (so we retain
+ # the original filenames that options come from)
+ parser.__init__()
+
+ # If there was a "global" section in the config file, use it
+ # to set Distribution options.
+
+ if 'global' in self.command_options:
+ for (opt, (src, val)) in self.command_options['global'].items():
+ alias = self.negative_opt.get(opt)
+ try:
+ if alias:
+ setattr(self, alias, not strtobool(val))
+ elif opt in ('verbose', 'dry_run'): # ugh!
+ setattr(self, opt, strtobool(val))
+ else:
+ setattr(self, opt, val)
+ except ValueError, msg:
+ raise DistutilsOptionError, msg
+
+ # -- Command-line parsing methods ----------------------------------
+
+ def parse_command_line(self):
+ """Parse the setup script's command line, taken from the
+ 'script_args' instance attribute (which defaults to 'sys.argv[1:]'
+ -- see 'setup()' in core.py). This list is first processed for
+ "global options" -- options that set attributes of the Distribution
+ instance. Then, it is alternately scanned for Distutils commands
+ and options for that command. Each new command terminates the
+ options for the previous command. The allowed options for a
+ command are determined by the 'user_options' attribute of the
+ command class -- thus, we have to be able to load command classes
+ in order to parse the command line. Any error in that 'options'
+ attribute raises DistutilsGetoptError; any error on the
+ command-line raises DistutilsArgError. If no Distutils commands
+ were found on the command line, raises DistutilsArgError. Return
+ true if command-line was successfully parsed and we should carry
+ on with executing commands; false if no errors but we shouldn't
+ execute commands (currently, this only happens if user asks for
+ help).
+ """
+ #
+ # We now have enough information to show the Macintosh dialog
+ # that allows the user to interactively specify the "command line".
+ #
+ toplevel_options = self._get_toplevel_options()
+
+ # We have to parse the command line a bit at a time -- global
+ # options, then the first command, then its options, and so on --
+ # because each command will be handled by a different class, and
+ # the options that are valid for a particular class aren't known
+ # until we have loaded the command class, which doesn't happen
+ # until we know what the command is.
+
+ self.commands = []
+ parser = FancyGetopt(toplevel_options + self.display_options)
+ parser.set_negative_aliases(self.negative_opt)
+ parser.set_aliases({'licence': 'license'})
+ args = parser.getopt(args=self.script_args, object=self)
+ option_order = parser.get_option_order()
+ log.set_verbosity(self.verbose)
+
+ # for display options we return immediately
+ if self.handle_display_options(option_order):
+ return
+ while args:
+ args = self._parse_command_opts(parser, args)
+ if args is None: # user asked for help (and got it)
+ return
+
+ # Handle the cases of --help as a "global" option, ie.
+ # "setup.py --help" and "setup.py --help command ...". For the
+ # former, we show global options (--verbose, --dry-run, etc.)
+ # and display-only options (--name, --version, etc.); for the
+ # latter, we omit the display-only options and show help for
+ # each command listed on the command line.
+ if self.help:
+ self._show_help(parser,
+ display_options=len(self.commands) == 0,
+ commands=self.commands)
+ return
+
+ # Oops, no commands found -- an end-user error
+ if not self.commands:
+ raise DistutilsArgError, "no commands supplied"
+
+ # All is well: return true
+ return 1
+
+ def _get_toplevel_options(self):
+ """Return the non-display options recognized at the top level.
+
+ This includes options that are recognized *only* at the top
+ level as well as options recognized for commands.
+ """
+ return self.global_options + [
+ ("command-packages=", None,
+ "list of packages that provide distutils commands"),
+ ]
+
+ def _parse_command_opts(self, parser, args):
+ """Parse the command-line options for a single command.
+ 'parser' must be a FancyGetopt instance; 'args' must be the list
+ of arguments, starting with the current command (whose options
+ we are about to parse). Returns a new version of 'args' with
+ the next command at the front of the list; will be the empty
+ list if there are no more commands on the command line. Returns
+ None if the user asked for help on this command.
+ """
+ # late import because of mutual dependence between these modules
+ from distutils.cmd import Command
+
+ # Pull the current command from the head of the command line
+ command = args[0]
+ if not command_re.match(command):
+ raise SystemExit, "invalid command name '%s'" % command
+ self.commands.append(command)
+
+ # Dig up the command class that implements this command, so we
+ # 1) know that it's a valid command, and 2) know which options
+ # it takes.
+ try:
+ cmd_class = self.get_command_class(command)
+ except DistutilsModuleError, msg:
+ raise DistutilsArgError, msg
+
+ # Require that the command class be derived from Command -- want
+ # to be sure that the basic "command" interface is implemented.
+ if not issubclass(cmd_class, Command):
+ raise DistutilsClassError, \
+ "command class %s must subclass Command" % cmd_class
+
+ # Also make sure that the command object provides a list of its
+ # known options.
+ if not (hasattr(cmd_class, 'user_options') and
+ isinstance(cmd_class.user_options, list)):
+ raise DistutilsClassError, \
+ ("command class %s must provide " +
+ "'user_options' attribute (a list of tuples)") % \
+ cmd_class
+
+ # If the command class has a list of negative alias options,
+ # merge it in with the global negative aliases.
+ negative_opt = self.negative_opt
+ if hasattr(cmd_class, 'negative_opt'):
+ negative_opt = negative_opt.copy()
+ negative_opt.update(cmd_class.negative_opt)
+
+ # Check for help_options in command class. They have a different
+ # format (tuple of four) so we need to preprocess them here.
+ if (hasattr(cmd_class, 'help_options') and
+ isinstance(cmd_class.help_options, list)):
+ help_options = fix_help_options(cmd_class.help_options)
+ else:
+ help_options = []
+
+
+ # All commands support the global options too, just by adding
+ # in 'global_options'.
+ parser.set_option_table(self.global_options +
+ cmd_class.user_options +
+ help_options)
+ parser.set_negative_aliases(negative_opt)
+ (args, opts) = parser.getopt(args[1:])
+ if hasattr(opts, 'help') and opts.help:
+ self._show_help(parser, display_options=0, commands=[cmd_class])
+ return
+
+ if (hasattr(cmd_class, 'help_options') and
+ isinstance(cmd_class.help_options, list)):
+ help_option_found=0
+ for (help_option, short, desc, func) in cmd_class.help_options:
+ if hasattr(opts, parser.get_attr_name(help_option)):
+ help_option_found=1
+ if hasattr(func, '__call__'):
+ func()
+ else:
+ raise DistutilsClassError(
+ "invalid help function %r for help option '%s': "
+ "must be a callable object (function, etc.)"
+ % (func, help_option))
+
+ if help_option_found:
+ return
+
+ # Put the options from the command-line into their official
+ # holding pen, the 'command_options' dictionary.
+ opt_dict = self.get_option_dict(command)
+ for (name, value) in vars(opts).items():
+ opt_dict[name] = ("command line", value)
+
+ return args
+
+ def finalize_options(self):
+ """Set final values for all the options on the Distribution
+ instance, analogous to the .finalize_options() method of Command
+ objects.
+ """
+ for attr in ('keywords', 'platforms'):
+ value = getattr(self.metadata, attr)
+ if value is None:
+ continue
+ if isinstance(value, str):
+ value = [elm.strip() for elm in value.split(',')]
+ setattr(self.metadata, attr, value)
+
+ def _show_help(self, parser, global_options=1, display_options=1,
+ commands=[]):
+ """Show help for the setup script command-line in the form of
+ several lists of command-line options. 'parser' should be a
+ FancyGetopt instance; do not expect it to be returned in the
+ same state, as its option table will be reset to make it
+ generate the correct help text.
+
+ If 'global_options' is true, lists the global options:
+ --verbose, --dry-run, etc. If 'display_options' is true, lists
+ the "display-only" options: --name, --version, etc. Finally,
+ lists per-command help for every command name or command class
+ in 'commands'.
+ """
+ # late import because of mutual dependence between these modules
+ from distutils.core import gen_usage
+ from distutils.cmd import Command
+
+ if global_options:
+ if display_options:
+ options = self._get_toplevel_options()
+ else:
+ options = self.global_options
+ parser.set_option_table(options)
+ parser.print_help(self.common_usage + "\nGlobal options:")
+ print('')
+
+ if display_options:
+ parser.set_option_table(self.display_options)
+ parser.print_help(
+ "Information display options (just display " +
+ "information, ignore any commands)")
+ print('')
+
+ for command in self.commands:
+ if isinstance(command, type) and issubclass(command, Command):
+ klass = command
+ else:
+ klass = self.get_command_class(command)
+ if (hasattr(klass, 'help_options') and
+ isinstance(klass.help_options, list)):
+ parser.set_option_table(klass.user_options +
+ fix_help_options(klass.help_options))
+ else:
+ parser.set_option_table(klass.user_options)
+ parser.print_help("Options for '%s' command:" % klass.__name__)
+ print('')
+
+ print(gen_usage(self.script_name))
+
+ def handle_display_options(self, option_order):
+ """If there were any non-global "display-only" options
+ (--help-commands or the metadata display options) on the command
+ line, display the requested info and return true; else return
+ false.
+ """
+ from distutils.core import gen_usage
+
+ # User just wants a list of commands -- we'll print it out and stop
+ # processing now (ie. if they ran "setup --help-commands foo bar",
+ # we ignore "foo bar").
+ if self.help_commands:
+ self.print_commands()
+ print('')
+ print(gen_usage(self.script_name))
+ return 1
+
+ # If user supplied any of the "display metadata" options, then
+ # display that metadata in the order in which the user supplied the
+ # metadata options.
+ any_display_options = 0
+ is_display_option = {}
+ for option in self.display_options:
+ is_display_option[option[0]] = 1
+
+ for (opt, val) in option_order:
+ if val and is_display_option.get(opt):
+ opt = translate_longopt(opt)
+ value = getattr(self.metadata, "get_"+opt)()
+ if opt in ['keywords', 'platforms']:
+ print(','.join(value))
+ elif opt in ('classifiers', 'provides', 'requires',
+ 'obsoletes'):
+ print('\n'.join(value))
+ else:
+ print(value)
+ any_display_options = 1
+
+ return any_display_options
+
+ def print_command_list(self, commands, header, max_length):
+ """Print a subset of the list of all commands -- used by
+ 'print_commands()'.
+ """
+ print(header + ":")
+
+ for cmd in commands:
+ klass = self.cmdclass.get(cmd)
+ if not klass:
+ klass = self.get_command_class(cmd)
+ try:
+ description = klass.description
+ except AttributeError:
+ description = "(no description available)"
+
+ print(" %-*s %s" % (max_length, cmd, description))
+
+ def print_commands(self):
+ """Print out a help message listing all available commands with a
+ description of each. The list is divided into "standard commands"
+ (listed in distutils.command.__all__) and "extra commands"
+ (mentioned in self.cmdclass, but not a standard command). The
+ descriptions come from the command class attribute
+ 'description'.
+ """
+ import distutils.command
+ std_commands = distutils.command.__all__
+ is_std = {}
+ for cmd in std_commands:
+ is_std[cmd] = 1
+
+ extra_commands = []
+ for cmd in self.cmdclass.keys():
+ if not is_std.get(cmd):
+ extra_commands.append(cmd)
+
+ max_length = 0
+ for cmd in (std_commands + extra_commands):
+ if len(cmd) > max_length:
+ max_length = len(cmd)
+
+ self.print_command_list(std_commands,
+ "Standard commands",
+ max_length)
+ if extra_commands:
+ print
+ self.print_command_list(extra_commands,
+ "Extra commands",
+ max_length)
+
+ def get_command_list(self):
+ """Get a list of (command, description) tuples.
+ The list is divided into "standard commands" (listed in
+ distutils.command.__all__) and "extra commands" (mentioned in
+ self.cmdclass, but not a standard command). The descriptions come
+ from the command class attribute 'description'.
+ """
+ # Currently this is only used on Mac OS, for the Mac-only GUI
+ # Distutils interface (by Jack Jansen)
+
+ import distutils.command
+ std_commands = distutils.command.__all__
+ is_std = {}
+ for cmd in std_commands:
+ is_std[cmd] = 1
+
+ extra_commands = []
+ for cmd in self.cmdclass.keys():
+ if not is_std.get(cmd):
+ extra_commands.append(cmd)
+
+ rv = []
+ for cmd in (std_commands + extra_commands):
+ klass = self.cmdclass.get(cmd)
+ if not klass:
+ klass = self.get_command_class(cmd)
+ try:
+ description = klass.description
+ except AttributeError:
+ description = "(no description available)"
+ rv.append((cmd, description))
+ return rv
+
+ # -- Command class/object methods ----------------------------------
+
+ def get_command_packages(self):
+ """Return a list of packages from which commands are loaded."""
+ pkgs = self.command_packages
+ if not isinstance(pkgs, list):
+ if pkgs is None:
+ pkgs = ''
+ pkgs = [pkg.strip() for pkg in pkgs.split(',') if pkg != '']
+ if "distutils.command" not in pkgs:
+ pkgs.insert(0, "distutils.command")
+ self.command_packages = pkgs
+ return pkgs
+
+ def get_command_class(self, command):
+ """Return the class that implements the Distutils command named by
+ 'command'. First we check the 'cmdclass' dictionary; if the
+ command is mentioned there, we fetch the class object from the
+ dictionary and return it. Otherwise we load the command module
+ ("distutils.command." + command) and fetch the command class from
+ the module. The loaded class is also stored in 'cmdclass'
+ to speed future calls to 'get_command_class()'.
+
+ Raises DistutilsModuleError if the expected module could not be
+ found, or if that module does not define the expected class.
+ """
+ klass = self.cmdclass.get(command)
+ if klass:
+ return klass
+
+ for pkgname in self.get_command_packages():
+ module_name = "%s.%s" % (pkgname, command)
+ klass_name = command
+
+ try:
+ __import__ (module_name)
+ module = sys.modules[module_name]
+ except ImportError:
+ continue
+
+ try:
+ klass = getattr(module, klass_name)
+ except AttributeError:
+ raise DistutilsModuleError, \
+ "invalid command '%s' (no class '%s' in module '%s')" \
+ % (command, klass_name, module_name)
+
+ self.cmdclass[command] = klass
+ return klass
+
+ raise DistutilsModuleError("invalid command '%s'" % command)
+
+
+ def get_command_obj(self, command, create=1):
+ """Return the command object for 'command'. Normally this object
+ is cached on a previous call to 'get_command_obj()'; if no command
+ object for 'command' is in the cache, then we either create and
+ return it (if 'create' is true) or return None.
+ """
+ cmd_obj = self.command_obj.get(command)
+ if not cmd_obj and create:
+ if DEBUG:
+ self.announce("Distribution.get_command_obj(): " \
+ "creating '%s' command object" % command)
+
+ klass = self.get_command_class(command)
+ cmd_obj = self.command_obj[command] = klass(self)
+ self.have_run[command] = 0
+
+ # Set any options that were supplied in config files
+ # or on the command line. (NB. support for error
+ # reporting is lame here: any errors aren't reported
+ # until 'finalize_options()' is called, which means
+ # we won't report the source of the error.)
+ options = self.command_options.get(command)
+ if options:
+ self._set_command_options(cmd_obj, options)
+
+ return cmd_obj
+
+ def _set_command_options(self, command_obj, option_dict=None):
+ """Set the options for 'command_obj' from 'option_dict'. Basically
+ this means copying elements of a dictionary ('option_dict') to
+ attributes of an instance ('command').
+
+ 'command_obj' must be a Command instance. If 'option_dict' is not
+ supplied, uses the standard option dictionary for this command
+ (from 'self.command_options').
+ """
+ command_name = command_obj.get_command_name()
+ if option_dict is None:
+ option_dict = self.get_option_dict(command_name)
+
+ if DEBUG:
+ self.announce(" setting options for '%s' command:" % command_name)
+ for (option, (source, value)) in option_dict.items():
+ if DEBUG:
+ self.announce(" %s = %s (from %s)" % (option, value,
+ source))
+ try:
+ bool_opts = map(translate_longopt, command_obj.boolean_options)
+ except AttributeError:
+ bool_opts = []
+ try:
+ neg_opt = command_obj.negative_opt
+ except AttributeError:
+ neg_opt = {}
+
+ try:
+ is_string = isinstance(value, str)
+ if option in neg_opt and is_string:
+ setattr(command_obj, neg_opt[option], not strtobool(value))
+ elif option in bool_opts and is_string:
+ setattr(command_obj, option, strtobool(value))
+ elif hasattr(command_obj, option):
+ setattr(command_obj, option, value)
+ else:
+ raise DistutilsOptionError, \
+ ("error in %s: command '%s' has no such option '%s'"
+ % (source, command_name, option))
+ except ValueError, msg:
+ raise DistutilsOptionError, msg
+
+ def reinitialize_command(self, command, reinit_subcommands=0):
+ """Reinitializes a command to the state it was in when first
+ returned by 'get_command_obj()': ie., initialized but not yet
+ finalized. This provides the opportunity to sneak option
+ values in programmatically, overriding or supplementing
+ user-supplied values from the config files and command line.
+ You'll have to re-finalize the command object (by calling
+ 'finalize_options()' or 'ensure_finalized()') before using it for
+ real.
+
+ 'command' should be a command name (string) or command object. If
+ 'reinit_subcommands' is true, also reinitializes the command's
+ sub-commands, as declared by the 'sub_commands' class attribute (if
+ it has one). See the "install" command for an example. Only
+ reinitializes the sub-commands that actually matter, ie. those
+ whose test predicates return true.
+
+ Returns the reinitialized command object.
+ """
+ from distutils.cmd import Command
+ if not isinstance(command, Command):
+ command_name = command
+ command = self.get_command_obj(command_name)
+ else:
+ command_name = command.get_command_name()
+
+ if not command.finalized:
+ return command
+ command.initialize_options()
+ command.finalized = 0
+ self.have_run[command_name] = 0
+ self._set_command_options(command)
+
+ if reinit_subcommands:
+ for sub in command.get_sub_commands():
+ self.reinitialize_command(sub, reinit_subcommands)
+
+ return command
+
+ # -- Methods that operate on the Distribution ----------------------
+
+ def announce(self, msg, level=log.INFO):
+ log.log(level, msg)
+
+ def run_commands(self):
+ """Run each command that was seen on the setup script command line.
+ Uses the list of commands found and cache of command objects
+ created by 'get_command_obj()'.
+ """
+ for cmd in self.commands:
+ self.run_command(cmd)
+
+ # -- Methods that operate on its Commands --------------------------
+
+ def run_command(self, command):
+ """Do whatever it takes to run a command (including nothing at all,
+ if the command has already been run). Specifically: if we have
+ already created and run the command named by 'command', return
+ silently without doing anything. If the command named by 'command'
+ doesn't even have a command object yet, create one. Then invoke
+ 'run()' on that command object (or an existing one).
+ """
+ # Already been here, done that? then return silently.
+ if self.have_run.get(command):
+ return
+
+ log.info("running %s", command)
+ cmd_obj = self.get_command_obj(command)
+ cmd_obj.ensure_finalized()
+ cmd_obj.run()
+ self.have_run[command] = 1
+
+
+ # -- Distribution query methods ------------------------------------
+
+ def has_pure_modules(self):
+ return len(self.packages or self.py_modules or []) > 0
+
+ def has_ext_modules(self):
+ return self.ext_modules and len(self.ext_modules) > 0
+
+ def has_c_libraries(self):
+ return self.libraries and len(self.libraries) > 0
+
+ def has_modules(self):
+ return self.has_pure_modules() or self.has_ext_modules()
+
+ def has_headers(self):
+ return self.headers and len(self.headers) > 0
+
+ def has_scripts(self):
+ return self.scripts and len(self.scripts) > 0
+
+ def has_data_files(self):
+ return self.data_files and len(self.data_files) > 0
+
+ def is_pure(self):
+ return (self.has_pure_modules() and
+ not self.has_ext_modules() and
+ not self.has_c_libraries())
+
+ # -- Metadata query methods ----------------------------------------
+
+ # If you're looking for 'get_name()', 'get_version()', and so forth,
+ # they are defined in a sneaky way: the constructor binds self.get_XXX
+ # to self.metadata.get_XXX. The actual code is in the
+ # DistributionMetadata class, below.
+
+class DistributionMetadata:
+ """Dummy class to hold the distribution meta-data: name, version,
+ author, and so forth.
+ """
+
+ _METHOD_BASENAMES = ("name", "version", "author", "author_email",
+ "maintainer", "maintainer_email", "url",
+ "license", "description", "long_description",
+ "keywords", "platforms", "fullname", "contact",
+ "contact_email", "license", "classifiers",
+ "download_url",
+ # PEP 314
+ "provides", "requires", "obsoletes",
+ )
+
+ def __init__(self, path=None):
+ if path is not None:
+ self.read_pkg_file(open(path))
+ else:
+ self.name = None
+ self.version = None
+ self.author = None
+ self.author_email = None
+ self.maintainer = None
+ self.maintainer_email = None
+ self.url = None
+ self.license = None
+ self.description = None
+ self.long_description = None
+ self.keywords = None
+ self.platforms = None
+ self.classifiers = None
+ self.download_url = None
+ # PEP 314
+ self.provides = None
+ self.requires = None
+ self.obsoletes = None
+
+ def read_pkg_file(self, file):
+ """Reads the metadata values from a file object."""
+ msg = message_from_file(file)
+
+ def _read_field(name):
+ value = msg[name]
+ if value == 'UNKNOWN':
+ return None
+ return value
+
+ def _read_list(name):
+ values = msg.get_all(name, None)
+ if values == []:
+ return None
+ return values
+
+ metadata_version = msg['metadata-version']
+ self.name = _read_field('name')
+ self.version = _read_field('version')
+ self.description = _read_field('summary')
+ # we are filling author only.
+ self.author = _read_field('author')
+ self.maintainer = None
+ self.author_email = _read_field('author-email')
+ self.maintainer_email = None
+ self.url = _read_field('home-page')
+ self.license = _read_field('license')
+
+ if 'download-url' in msg:
+ self.download_url = _read_field('download-url')
+ else:
+ self.download_url = None
+
+ self.long_description = _read_field('description')
+ self.description = _read_field('summary')
+
+ if 'keywords' in msg:
+ self.keywords = _read_field('keywords').split(',')
+
+ self.platforms = _read_list('platform')
+ self.classifiers = _read_list('classifier')
+
+ # PEP 314 - these fields only exist in 1.1
+ if metadata_version == '1.1':
+ self.requires = _read_list('requires')
+ self.provides = _read_list('provides')
+ self.obsoletes = _read_list('obsoletes')
+ else:
+ self.requires = None
+ self.provides = None
+ self.obsoletes = None
+
+ def write_pkg_info(self, base_dir):
+ """Write the PKG-INFO file into the release tree.
+ """
+ pkg_info = open(os.path.join(base_dir, 'PKG-INFO'), 'w')
+ try:
+ self.write_pkg_file(pkg_info)
+ finally:
+ pkg_info.close()
+
+ def write_pkg_file(self, file):
+ """Write the PKG-INFO format data to a file object.
+ """
+ version = '1.0'
+ if (self.provides or self.requires or self.obsoletes or
+ self.classifiers or self.download_url):
+ version = '1.1'
+
+ self._write_field(file, 'Metadata-Version', version)
+ self._write_field(file, 'Name', self.get_name())
+ self._write_field(file, 'Version', self.get_version())
+ self._write_field(file, 'Summary', self.get_description())
+ self._write_field(file, 'Home-page', self.get_url())
+ self._write_field(file, 'Author', self.get_contact())
+ self._write_field(file, 'Author-email', self.get_contact_email())
+ self._write_field(file, 'License', self.get_license())
+ if self.download_url:
+ self._write_field(file, 'Download-URL', self.download_url)
+
+ long_desc = rfc822_escape(self.get_long_description())
+ self._write_field(file, 'Description', long_desc)
+
+ keywords = ','.join(self.get_keywords())
+ if keywords:
+ self._write_field(file, 'Keywords', keywords)
+
+ self._write_list(file, 'Platform', self.get_platforms())
+ self._write_list(file, 'Classifier', self.get_classifiers())
+
+ # PEP 314
+ self._write_list(file, 'Requires', self.get_requires())
+ self._write_list(file, 'Provides', self.get_provides())
+ self._write_list(file, 'Obsoletes', self.get_obsoletes())
+
+ def _write_field(self, file, name, value):
+ file.write('%s: %s\n' % (name, self._encode_field(value)))
+
+ def _write_list (self, file, name, values):
+ for value in values:
+ self._write_field(file, name, value)
+
+ def _encode_field(self, value):
+ if value is None:
+ return None
+ if isinstance(value, unicode):
+ return value.encode(PKG_INFO_ENCODING)
+ return str(value)
+
+ # -- Metadata query methods ----------------------------------------
+
+ def get_name(self):
+ return self.name or "UNKNOWN"
+
+ def get_version(self):
+ return self.version or "0.0.0"
+
+ def get_fullname(self):
+ return "%s-%s" % (self.get_name(), self.get_version())
+
+ def get_author(self):
+ return self._encode_field(self.author) or "UNKNOWN"
+
+ def get_author_email(self):
+ return self.author_email or "UNKNOWN"
+
+ def get_maintainer(self):
+ return self._encode_field(self.maintainer) or "UNKNOWN"
+
+ def get_maintainer_email(self):
+ return self.maintainer_email or "UNKNOWN"
+
+ def get_contact(self):
+ return (self._encode_field(self.maintainer) or
+ self._encode_field(self.author) or "UNKNOWN")
+
+ def get_contact_email(self):
+ return self.maintainer_email or self.author_email or "UNKNOWN"
+
+ def get_url(self):
+ return self.url or "UNKNOWN"
+
+ def get_license(self):
+ return self.license or "UNKNOWN"
+ get_licence = get_license
+
+ def get_description(self):
+ return self._encode_field(self.description) or "UNKNOWN"
+
+ def get_long_description(self):
+ return self._encode_field(self.long_description) or "UNKNOWN"
+
+ def get_keywords(self):
+ return self.keywords or []
+
+ def get_platforms(self):
+ return self.platforms or ["UNKNOWN"]
+
+ def get_classifiers(self):
+ return self.classifiers or []
+
+ def get_download_url(self):
+ return self.download_url or "UNKNOWN"
+
+ # PEP 314
+ def get_requires(self):
+ return self.requires or []
+
+ def set_requires(self, value):
+ import distutils.versionpredicate
+ for v in value:
+ distutils.versionpredicate.VersionPredicate(v)
+ self.requires = value
+
+ def get_provides(self):
+ return self.provides or []
+
+ def set_provides(self, value):
+ value = [v.strip() for v in value]
+ for v in value:
+ import distutils.versionpredicate
+ distutils.versionpredicate.split_provision(v)
+ self.provides = value
+
+ def get_obsoletes(self):
+ return self.obsoletes or []
+
+ def set_obsoletes(self, value):
+ import distutils.versionpredicate
+ for v in value:
+ distutils.versionpredicate.VersionPredicate(v)
+ self.obsoletes = value
+
+def fix_help_options(options):
+ """Convert a 4-tuple 'help_options' list as found in various command
+ classes to the 3-tuple form required by FancyGetopt.
+ """
+ new_options = []
+ for help_tuple in options:
+ new_options.append(help_tuple[0:3])
+ return new_options
diff --git a/cashew/Lib/distutils/emxccompiler.py b/cashew/Lib/distutils/emxccompiler.py
new file mode 100644
index 0000000..a017205
--- /dev/null
+++ b/cashew/Lib/distutils/emxccompiler.py
@@ -0,0 +1,319 @@
+"""distutils.emxccompiler
+
+Provides the EMXCCompiler class, a subclass of UnixCCompiler that
+handles the EMX port of the GNU C compiler to OS/2.
+"""
+
+# issues:
+#
+# * OS/2 insists that DLLs can have names no longer than 8 characters
+# We put export_symbols in a def-file, as though the DLL can have
+# an arbitrary length name, but truncate the output filename.
+#
+# * only use OMF objects and use LINK386 as the linker (-Zomf)
+#
+# * always build for multithreading (-Zmt) as the accompanying OS/2 port
+# of Python is only distributed with threads enabled.
+#
+# tested configurations:
+#
+# * EMX gcc 2.81/EMX 0.9d fix03
+
+__revision__ = "$Id$"
+
+import os,sys,copy
+from distutils.ccompiler import gen_preprocess_options, gen_lib_options
+from distutils.unixccompiler import UnixCCompiler
+from distutils.file_util import write_file
+from distutils.errors import DistutilsExecError, CompileError, UnknownFileError
+from distutils import log
+
+class EMXCCompiler (UnixCCompiler):
+
+ compiler_type = 'emx'
+ obj_extension = ".obj"
+ static_lib_extension = ".lib"
+ shared_lib_extension = ".dll"
+ static_lib_format = "%s%s"
+ shared_lib_format = "%s%s"
+ res_extension = ".res" # compiled resource file
+ exe_extension = ".exe"
+
+ def __init__ (self,
+ verbose=0,
+ dry_run=0,
+ force=0):
+
+ UnixCCompiler.__init__ (self, verbose, dry_run, force)
+
+ (status, details) = check_config_h()
+ self.debug_print("Python's GCC status: %s (details: %s)" %
+ (status, details))
+ if status is not CONFIG_H_OK:
+ self.warn(
+ "Python's pyconfig.h doesn't seem to support your compiler. " +
+ ("Reason: %s." % details) +
+ "Compiling may fail because of undefined preprocessor macros.")
+
+ (self.gcc_version, self.ld_version) = \
+ get_versions()
+ self.debug_print(self.compiler_type + ": gcc %s, ld %s\n" %
+ (self.gcc_version,
+ self.ld_version) )
+
+ # Hard-code GCC because that's what this is all about.
+ # XXX optimization, warnings etc. should be customizable.
+ self.set_executables(compiler='gcc -Zomf -Zmt -O3 -fomit-frame-pointer -mprobe -Wall',
+ compiler_so='gcc -Zomf -Zmt -O3 -fomit-frame-pointer -mprobe -Wall',
+ linker_exe='gcc -Zomf -Zmt -Zcrtdll',
+ linker_so='gcc -Zomf -Zmt -Zcrtdll -Zdll')
+
+ # want the gcc library statically linked (so that we don't have
+ # to distribute a version dependent on the compiler we have)
+ self.dll_libraries=["gcc"]
+
+ # __init__ ()
+
+ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+ if ext == '.rc':
+ # gcc requires '.rc' compiled to binary ('.res') files !!!
+ try:
+ self.spawn(["rc", "-r", src])
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+ else: # for other files use the C-compiler
+ try:
+ self.spawn(self.compiler_so + cc_args + [src, '-o', obj] +
+ extra_postargs)
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ def link (self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir=None,
+ libraries=None,
+ library_dirs=None,
+ runtime_library_dirs=None,
+ export_symbols=None,
+ debug=0,
+ extra_preargs=None,
+ extra_postargs=None,
+ build_temp=None,
+ target_lang=None):
+
+ # use separate copies, so we can modify the lists
+ extra_preargs = copy.copy(extra_preargs or [])
+ libraries = copy.copy(libraries or [])
+ objects = copy.copy(objects or [])
+
+ # Additional libraries
+ libraries.extend(self.dll_libraries)
+
+ # handle export symbols by creating a def-file
+ # with executables this only works with gcc/ld as linker
+ if ((export_symbols is not None) and
+ (target_desc != self.EXECUTABLE)):
+ # (The linker doesn't do anything if output is up-to-date.
+ # So it would probably better to check if we really need this,
+ # but for this we had to insert some unchanged parts of
+ # UnixCCompiler, and this is not what we want.)
+
+ # we want to put some files in the same directory as the
+ # object files are, build_temp doesn't help much
+ # where are the object files
+ temp_dir = os.path.dirname(objects[0])
+ # name of dll to give the helper files the same base name
+ (dll_name, dll_extension) = os.path.splitext(
+ os.path.basename(output_filename))
+
+ # generate the filenames for these files
+ def_file = os.path.join(temp_dir, dll_name + ".def")
+
+ # Generate .def file
+ contents = [
+ "LIBRARY %s INITINSTANCE TERMINSTANCE" % \
+ os.path.splitext(os.path.basename(output_filename))[0],
+ "DATA MULTIPLE NONSHARED",
+ "EXPORTS"]
+ for sym in export_symbols:
+ contents.append(' "%s"' % sym)
+ self.execute(write_file, (def_file, contents),
+ "writing %s" % def_file)
+
+ # next add options for def-file and to creating import libraries
+ # for gcc/ld the def-file is specified as any other object files
+ objects.append(def_file)
+
+ #end: if ((export_symbols is not None) and
+ # (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")):
+
+ # who wants symbols and a many times larger output file
+ # should explicitly switch the debug mode on
+ # otherwise we let dllwrap/ld strip the output file
+ # (On my machine: 10KB < stripped_file < ??100KB
+ # unstripped_file = stripped_file + XXX KB
+ # ( XXX=254 for a typical python extension))
+ if not debug:
+ extra_preargs.append("-s")
+
+ UnixCCompiler.link(self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir,
+ libraries,
+ library_dirs,
+ runtime_library_dirs,
+ None, # export_symbols, we do this in our def-file
+ debug,
+ extra_preargs,
+ extra_postargs,
+ build_temp,
+ target_lang)
+
+ # link ()
+
+ # -- Miscellaneous methods -----------------------------------------
+
+ # override the object_filenames method from CCompiler to
+ # support rc and res-files
+ def object_filenames (self,
+ source_filenames,
+ strip_dir=0,
+ output_dir=''):
+ if output_dir is None: output_dir = ''
+ obj_names = []
+ for src_name in source_filenames:
+ # use normcase to make sure '.rc' is really '.rc' and not '.RC'
+ (base, ext) = os.path.splitext (os.path.normcase(src_name))
+ if ext not in (self.src_extensions + ['.rc']):
+ raise UnknownFileError, \
+ "unknown file type '%s' (from '%s')" % \
+ (ext, src_name)
+ if strip_dir:
+ base = os.path.basename (base)
+ if ext == '.rc':
+ # these need to be compiled to object files
+ obj_names.append (os.path.join (output_dir,
+ base + self.res_extension))
+ else:
+ obj_names.append (os.path.join (output_dir,
+ base + self.obj_extension))
+ return obj_names
+
+ # object_filenames ()
+
+ # override the find_library_file method from UnixCCompiler
+ # to deal with file naming/searching differences
+ def find_library_file(self, dirs, lib, debug=0):
+ shortlib = '%s.lib' % lib
+ longlib = 'lib%s.lib' % lib # this form very rare
+
+ # get EMX's default library directory search path
+ try:
+ emx_dirs = os.environ['LIBRARY_PATH'].split(';')
+ except KeyError:
+ emx_dirs = []
+
+ for dir in dirs + emx_dirs:
+ shortlibp = os.path.join(dir, shortlib)
+ longlibp = os.path.join(dir, longlib)
+ if os.path.exists(shortlibp):
+ return shortlibp
+ elif os.path.exists(longlibp):
+ return longlibp
+
+ # Oops, didn't find it in *any* of 'dirs'
+ return None
+
+# class EMXCCompiler
+
+
+# Because these compilers aren't configured in Python's pyconfig.h file by
+# default, we should at least warn the user if he is using a unmodified
+# version.
+
+CONFIG_H_OK = "ok"
+CONFIG_H_NOTOK = "not ok"
+CONFIG_H_UNCERTAIN = "uncertain"
+
+def check_config_h():
+
+ """Check if the current Python installation (specifically, pyconfig.h)
+ appears amenable to building extensions with GCC. Returns a tuple
+ (status, details), where 'status' is one of the following constants:
+ CONFIG_H_OK
+ all is well, go ahead and compile
+ CONFIG_H_NOTOK
+ doesn't look good
+ CONFIG_H_UNCERTAIN
+ not sure -- unable to read pyconfig.h
+ 'details' is a human-readable string explaining the situation.
+
+ Note there are two ways to conclude "OK": either 'sys.version' contains
+ the string "GCC" (implying that this Python was built with GCC), or the
+ installed "pyconfig.h" contains the string "__GNUC__".
+ """
+
+ # XXX since this function also checks sys.version, it's not strictly a
+ # "pyconfig.h" check -- should probably be renamed...
+
+ from distutils import sysconfig
+ import string
+ # if sys.version contains GCC then python was compiled with
+ # GCC, and the pyconfig.h file should be OK
+ if string.find(sys.version,"GCC") >= 0:
+ return (CONFIG_H_OK, "sys.version mentions 'GCC'")
+
+ fn = sysconfig.get_config_h_filename()
+ try:
+ # It would probably better to read single lines to search.
+ # But we do this only once, and it is fast enough
+ f = open(fn)
+ try:
+ s = f.read()
+ finally:
+ f.close()
+
+ except IOError, exc:
+ # if we can't read this file, we cannot say it is wrong
+ # the compiler will complain later about this file as missing
+ return (CONFIG_H_UNCERTAIN,
+ "couldn't read '%s': %s" % (fn, exc.strerror))
+
+ else:
+ # "pyconfig.h" contains an "#ifdef __GNUC__" or something similar
+ if string.find(s,"__GNUC__") >= 0:
+ return (CONFIG_H_OK, "'%s' mentions '__GNUC__'" % fn)
+ else:
+ return (CONFIG_H_NOTOK, "'%s' does not mention '__GNUC__'" % fn)
+
+
+def get_versions():
+ """ Try to find out the versions of gcc and ld.
+ If not possible it returns None for it.
+ """
+ from distutils.version import StrictVersion
+ from distutils.spawn import find_executable
+ import re
+
+ gcc_exe = find_executable('gcc')
+ if gcc_exe:
+ out = os.popen(gcc_exe + ' -dumpversion','r')
+ try:
+ out_string = out.read()
+ finally:
+ out.close()
+ result = re.search('(\d+\.\d+\.\d+)',out_string)
+ if result:
+ gcc_version = StrictVersion(result.group(1))
+ else:
+ gcc_version = None
+ else:
+ gcc_version = None
+ # EMX ld has no way of reporting version number, and we use GCC
+ # anyway - so we can link OMF DLLs
+ ld_version = None
+ return (gcc_version, ld_version)
diff --git a/cashew/Lib/distutils/errors.py b/cashew/Lib/distutils/errors.py
new file mode 100644
index 0000000..d9c47c7
--- /dev/null
+++ b/cashew/Lib/distutils/errors.py
@@ -0,0 +1,88 @@
+"""distutils.errors
+
+Provides exceptions used by the Distutils modules. Note that Distutils
+modules may raise standard exceptions; in particular, SystemExit is
+usually raised for errors that are obviously the end-user's fault
+(eg. bad command-line arguments).
+
+This module is safe to use in "from ... import *" mode; it only exports
+symbols whose names start with "Distutils" and end with "Error"."""
+
+__revision__ = "$Id$"
+
+class DistutilsError(Exception):
+ """The root of all Distutils evil."""
+
+class DistutilsModuleError(DistutilsError):
+ """Unable to load an expected module, or to find an expected class
+ within some module (in particular, command modules and classes)."""
+
+class DistutilsClassError(DistutilsError):
+ """Some command class (or possibly distribution class, if anyone
+ feels a need to subclass Distribution) is found not to be holding
+ up its end of the bargain, ie. implementing some part of the
+ "command "interface."""
+
+class DistutilsGetoptError(DistutilsError):
+ """The option table provided to 'fancy_getopt()' is bogus."""
+
+class DistutilsArgError(DistutilsError):
+ """Raised by fancy_getopt in response to getopt.error -- ie. an
+ error in the command line usage."""
+
+class DistutilsFileError(DistutilsError):
+ """Any problems in the filesystem: expected file not found, etc.
+ Typically this is for problems that we detect before IOError or
+ OSError could be raised."""
+
+class DistutilsOptionError(DistutilsError):
+ """Syntactic/semantic errors in command options, such as use of
+ mutually conflicting options, or inconsistent options,
+ badly-spelled values, etc. No distinction is made between option
+ values originating in the setup script, the command line, config
+ files, or what-have-you -- but if we *know* something originated in
+ the setup script, we'll raise DistutilsSetupError instead."""
+
+class DistutilsSetupError(DistutilsError):
+ """For errors that can be definitely blamed on the setup script,
+ such as invalid keyword arguments to 'setup()'."""
+
+class DistutilsPlatformError(DistutilsError):
+ """We don't know how to do something on the current platform (but
+ we do know how to do it on some platform) -- eg. trying to compile
+ C files on a platform not supported by a CCompiler subclass."""
+
+class DistutilsExecError(DistutilsError):
+ """Any problems executing an external program (such as the C
+ compiler, when compiling C files)."""
+
+class DistutilsInternalError(DistutilsError):
+ """Internal inconsistencies or impossibilities (obviously, this
+ should never be seen if the code is working!)."""
+
+class DistutilsTemplateError(DistutilsError):
+ """Syntax error in a file list template."""
+
+class DistutilsByteCompileError(DistutilsError):
+ """Byte compile error."""
+
+# Exception classes used by the CCompiler implementation classes
+class CCompilerError(Exception):
+ """Some compile/link operation failed."""
+
+class PreprocessError(CCompilerError):
+ """Failure to preprocess one or more C/C++ files."""
+
+class CompileError(CCompilerError):
+ """Failure to compile one or more C/C++ source files."""
+
+class LibError(CCompilerError):
+ """Failure to create a static library from one or more C/C++ object
+ files."""
+
+class LinkError(CCompilerError):
+ """Failure to link one or more C/C++ object files into an executable
+ or shared library file."""
+
+class UnknownFileError(CCompilerError):
+ """Attempt to process an unknown file type."""
diff --git a/cashew/Lib/distutils/extension.py b/cashew/Lib/distutils/extension.py
new file mode 100644
index 0000000..9a67ca8
--- /dev/null
+++ b/cashew/Lib/distutils/extension.py
@@ -0,0 +1,255 @@
+"""distutils.extension
+
+Provides the Extension class, used to describe C/C++ extension
+modules in setup scripts."""
+
+__revision__ = "$Id$"
+
+import os, string, sys
+from types import *
+
+try:
+ import warnings
+except ImportError:
+ warnings = None
+
+# This class is really only used by the "build_ext" command, so it might
+# make sense to put it in distutils.command.build_ext. However, that
+# module is already big enough, and I want to make this class a bit more
+# complex to simplify some common cases ("foo" module in "foo.c") and do
+# better error-checking ("foo.c" actually exists).
+#
+# Also, putting this in build_ext.py means every setup script would have to
+# import that large-ish module (indirectly, through distutils.core) in
+# order to do anything.
+
+class Extension:
+ """Just a collection of attributes that describes an extension
+ module and everything needed to build it (hopefully in a portable
+ way, but there are hooks that let you be as unportable as you need).
+
+ Instance attributes:
+ name : string
+ the full name of the extension, including any packages -- ie.
+ *not* a filename or pathname, but Python dotted name
+ sources : [string]
+ list of source filenames, relative to the distribution root
+ (where the setup script lives), in Unix form (slash-separated)
+ for portability. Source files may be C, C++, SWIG (.i),
+ platform-specific resource files, or whatever else is recognized
+ by the "build_ext" command as source for a Python extension.
+ include_dirs : [string]
+ list of directories to search for C/C++ header files (in Unix
+ form for portability)
+ define_macros : [(name : string, value : string|None)]
+ list of macros to define; each macro is defined using a 2-tuple,
+ where 'value' is either the string to define it to or None to
+ define it without a particular value (equivalent of "#define
+ FOO" in source or -DFOO on Unix C compiler command line)
+ undef_macros : [string]
+ list of macros to undefine explicitly
+ library_dirs : [string]
+ list of directories to search for C/C++ libraries at link time
+ libraries : [string]
+ list of library names (not filenames or paths) to link against
+ runtime_library_dirs : [string]
+ list of directories to search for C/C++ libraries at run time
+ (for shared extensions, this is when the extension is loaded)
+ extra_objects : [string]
+ list of extra files to link with (eg. object files not implied
+ by 'sources', static library that must be explicitly specified,
+ binary resource files, etc.)
+ extra_compile_args : [string]
+ any extra platform- and compiler-specific information to use
+ when compiling the source files in 'sources'. For platforms and
+ compilers where "command line" makes sense, this is typically a
+ list of command-line arguments, but for other platforms it could
+ be anything.
+ extra_link_args : [string]
+ any extra platform- and compiler-specific information to use
+ when linking object files together to create the extension (or
+ to create a new static Python interpreter). Similar
+ interpretation as for 'extra_compile_args'.
+ export_symbols : [string]
+ list of symbols to be exported from a shared extension. Not
+ used on all platforms, and not generally necessary for Python
+ extensions, which typically export exactly one symbol: "init" +
+ extension_name.
+ swig_opts : [string]
+ any extra options to pass to SWIG if a source file has the .i
+ extension.
+ depends : [string]
+ list of files that the extension depends on
+ language : string
+ extension language (i.e. "c", "c++", "objc"). Will be detected
+ from the source extensions if not provided.
+ """
+
+ # When adding arguments to this constructor, be sure to update
+ # setup_keywords in core.py.
+ def __init__ (self, name, sources,
+ include_dirs=None,
+ define_macros=None,
+ undef_macros=None,
+ library_dirs=None,
+ libraries=None,
+ runtime_library_dirs=None,
+ extra_objects=None,
+ extra_compile_args=None,
+ extra_link_args=None,
+ export_symbols=None,
+ swig_opts = None,
+ depends=None,
+ language=None,
+ **kw # To catch unknown keywords
+ ):
+ assert type(name) is StringType, "'name' must be a string"
+ assert (type(sources) is ListType and
+ map(type, sources) == [StringType]*len(sources)), \
+ "'sources' must be a list of strings"
+
+ self.name = name
+ self.sources = sources
+ self.include_dirs = include_dirs or []
+ self.define_macros = define_macros or []
+ self.undef_macros = undef_macros or []
+ self.library_dirs = library_dirs or []
+ self.libraries = libraries or []
+ self.runtime_library_dirs = runtime_library_dirs or []
+ self.extra_objects = extra_objects or []
+ self.extra_compile_args = extra_compile_args or []
+ self.extra_link_args = extra_link_args or []
+ self.export_symbols = export_symbols or []
+ self.swig_opts = swig_opts or []
+ self.depends = depends or []
+ self.language = language
+
+ # If there are unknown keyword options, warn about them
+ if len(kw):
+ L = kw.keys() ; L.sort()
+ L = map(repr, L)
+ msg = "Unknown Extension options: " + string.join(L, ', ')
+ if warnings is not None:
+ warnings.warn(msg)
+ else:
+ sys.stderr.write(msg + '\n')
+# class Extension
+
+
+def read_setup_file (filename):
+ from distutils.sysconfig import \
+ parse_makefile, expand_makefile_vars, _variable_rx
+ from distutils.text_file import TextFile
+ from distutils.util import split_quoted
+
+ # First pass over the file to gather "VAR = VALUE" assignments.
+ vars = parse_makefile(filename)
+
+ # Second pass to gobble up the real content: lines of the form
+ # ... [ ...] [ ...] [ ...]
+ file = TextFile(filename,
+ strip_comments=1, skip_blanks=1, join_lines=1,
+ lstrip_ws=1, rstrip_ws=1)
+ try:
+ extensions = []
+
+ while 1:
+ line = file.readline()
+ if line is None: # eof
+ break
+ if _variable_rx.match(line): # VAR=VALUE, handled in first pass
+ continue
+
+ if line[0] == line[-1] == "*":
+ file.warn("'%s' lines not handled yet" % line)
+ continue
+
+ #print "original line: " + line
+ line = expand_makefile_vars(line, vars)
+ words = split_quoted(line)
+ #print "expanded line: " + line
+
+ # NB. this parses a slightly different syntax than the old
+ # makesetup script: here, there must be exactly one extension per
+ # line, and it must be the first word of the line. I have no idea
+ # why the old syntax supported multiple extensions per line, as
+ # they all wind up being the same.
+
+ module = words[0]
+ ext = Extension(module, [])
+ append_next_word = None
+
+ for word in words[1:]:
+ if append_next_word is not None:
+ append_next_word.append(word)
+ append_next_word = None
+ continue
+
+ suffix = os.path.splitext(word)[1]
+ switch = word[0:2] ; value = word[2:]
+
+ if suffix in (".c", ".cc", ".cpp", ".cxx", ".c++", ".m", ".mm"):
+ # hmm, should we do something about C vs. C++ sources?
+ # or leave it up to the CCompiler implementation to
+ # worry about?
+ ext.sources.append(word)
+ elif switch == "-I":
+ ext.include_dirs.append(value)
+ elif switch == "-D":
+ equals = string.find(value, "=")
+ if equals == -1: # bare "-DFOO" -- no value
+ ext.define_macros.append((value, None))
+ else: # "-DFOO=blah"
+ ext.define_macros.append((value[0:equals],
+ value[equals+2:]))
+ elif switch == "-U":
+ ext.undef_macros.append(value)
+ elif switch == "-C": # only here 'cause makesetup has it!
+ ext.extra_compile_args.append(word)
+ elif switch == "-l":
+ ext.libraries.append(value)
+ elif switch == "-L":
+ ext.library_dirs.append(value)
+ elif switch == "-R":
+ ext.runtime_library_dirs.append(value)
+ elif word == "-rpath":
+ append_next_word = ext.runtime_library_dirs
+ elif word == "-Xlinker":
+ append_next_word = ext.extra_link_args
+ elif word == "-Xcompiler":
+ append_next_word = ext.extra_compile_args
+ elif switch == "-u":
+ ext.extra_link_args.append(word)
+ if not value:
+ append_next_word = ext.extra_link_args
+ elif word == "-Xcompiler":
+ append_next_word = ext.extra_compile_args
+ elif switch == "-u":
+ ext.extra_link_args.append(word)
+ if not value:
+ append_next_word = ext.extra_link_args
+ elif suffix in (".a", ".so", ".sl", ".o", ".dylib"):
+ # NB. a really faithful emulation of makesetup would
+ # append a .o file to extra_objects only if it
+ # had a slash in it; otherwise, it would s/.o/.c/
+ # and append it to sources. Hmmmm.
+ ext.extra_objects.append(word)
+ else:
+ file.warn("unrecognized argument '%s'" % word)
+
+ extensions.append(ext)
+ finally:
+ file.close()
+
+ #print "module:", module
+ #print "source files:", source_files
+ #print "cpp args:", cpp_args
+ #print "lib args:", library_args
+
+ #extensions[module] = { 'sources': source_files,
+ # 'cpp_args': cpp_args,
+ # 'lib_args': library_args }
+
+ return extensions
+
+# read_setup_file ()
diff --git a/cashew/Lib/distutils/fancy_getopt.py b/cashew/Lib/distutils/fancy_getopt.py
new file mode 100644
index 0000000..2dea948
--- /dev/null
+++ b/cashew/Lib/distutils/fancy_getopt.py
@@ -0,0 +1,484 @@
+"""distutils.fancy_getopt
+
+Wrapper around the standard getopt module that provides the following
+additional features:
+ * short and long options are tied together
+ * options have help strings, so fancy_getopt could potentially
+ create a complete usage summary
+ * options set attributes of a passed-in object
+"""
+
+__revision__ = "$Id$"
+
+import sys
+import string
+import re
+import getopt
+from distutils.errors import DistutilsGetoptError, DistutilsArgError
+
+# Much like command_re in distutils.core, this is close to but not quite
+# the same as a Python NAME -- except, in the spirit of most GNU
+# utilities, we use '-' in place of '_'. (The spirit of LISP lives on!)
+# The similarities to NAME are again not a coincidence...
+longopt_pat = r'[a-zA-Z](?:[a-zA-Z0-9-]*)'
+longopt_re = re.compile(r'^%s$' % longopt_pat)
+
+# For recognizing "negative alias" options, eg. "quiet=!verbose"
+neg_alias_re = re.compile("^(%s)=!(%s)$" % (longopt_pat, longopt_pat))
+
+# This is used to translate long options to legitimate Python identifiers
+# (for use as attributes of some object).
+longopt_xlate = string.maketrans('-', '_')
+
+class FancyGetopt:
+ """Wrapper around the standard 'getopt()' module that provides some
+ handy extra functionality:
+ * short and long options are tied together
+ * options have help strings, and help text can be assembled
+ from them
+ * options set attributes of a passed-in object
+ * boolean options can have "negative aliases" -- eg. if
+ --quiet is the "negative alias" of --verbose, then "--quiet"
+ on the command line sets 'verbose' to false
+ """
+
+ def __init__ (self, option_table=None):
+
+ # The option table is (currently) a list of tuples. The
+ # tuples may have 3 or four values:
+ # (long_option, short_option, help_string [, repeatable])
+ # if an option takes an argument, its long_option should have '='
+ # appended; short_option should just be a single character, no ':'
+ # in any case. If a long_option doesn't have a corresponding
+ # short_option, short_option should be None. All option tuples
+ # must have long options.
+ self.option_table = option_table
+
+ # 'option_index' maps long option names to entries in the option
+ # table (ie. those 3-tuples).
+ self.option_index = {}
+ if self.option_table:
+ self._build_index()
+
+ # 'alias' records (duh) alias options; {'foo': 'bar'} means
+ # --foo is an alias for --bar
+ self.alias = {}
+
+ # 'negative_alias' keeps track of options that are the boolean
+ # opposite of some other option
+ self.negative_alias = {}
+
+ # These keep track of the information in the option table. We
+ # don't actually populate these structures until we're ready to
+ # parse the command-line, since the 'option_table' passed in here
+ # isn't necessarily the final word.
+ self.short_opts = []
+ self.long_opts = []
+ self.short2long = {}
+ self.attr_name = {}
+ self.takes_arg = {}
+
+ # And 'option_order' is filled up in 'getopt()'; it records the
+ # original order of options (and their values) on the command-line,
+ # but expands short options, converts aliases, etc.
+ self.option_order = []
+
+ # __init__ ()
+
+
+ def _build_index (self):
+ self.option_index.clear()
+ for option in self.option_table:
+ self.option_index[option[0]] = option
+
+ def set_option_table (self, option_table):
+ self.option_table = option_table
+ self._build_index()
+
+ def add_option (self, long_option, short_option=None, help_string=None):
+ if long_option in self.option_index:
+ raise DistutilsGetoptError, \
+ "option conflict: already an option '%s'" % long_option
+ else:
+ option = (long_option, short_option, help_string)
+ self.option_table.append(option)
+ self.option_index[long_option] = option
+
+
+ def has_option (self, long_option):
+ """Return true if the option table for this parser has an
+ option with long name 'long_option'."""
+ return long_option in self.option_index
+
+ def get_attr_name (self, long_option):
+ """Translate long option name 'long_option' to the form it
+ has as an attribute of some object: ie., translate hyphens
+ to underscores."""
+ return string.translate(long_option, longopt_xlate)
+
+
+ def _check_alias_dict (self, aliases, what):
+ assert isinstance(aliases, dict)
+ for (alias, opt) in aliases.items():
+ if alias not in self.option_index:
+ raise DistutilsGetoptError, \
+ ("invalid %s '%s': "
+ "option '%s' not defined") % (what, alias, alias)
+ if opt not in self.option_index:
+ raise DistutilsGetoptError, \
+ ("invalid %s '%s': "
+ "aliased option '%s' not defined") % (what, alias, opt)
+
+ def set_aliases (self, alias):
+ """Set the aliases for this option parser."""
+ self._check_alias_dict(alias, "alias")
+ self.alias = alias
+
+ def set_negative_aliases (self, negative_alias):
+ """Set the negative aliases for this option parser.
+ 'negative_alias' should be a dictionary mapping option names to
+ option names, both the key and value must already be defined
+ in the option table."""
+ self._check_alias_dict(negative_alias, "negative alias")
+ self.negative_alias = negative_alias
+
+
+ def _grok_option_table (self):
+ """Populate the various data structures that keep tabs on the
+ option table. Called by 'getopt()' before it can do anything
+ worthwhile.
+ """
+ self.long_opts = []
+ self.short_opts = []
+ self.short2long.clear()
+ self.repeat = {}
+
+ for option in self.option_table:
+ if len(option) == 3:
+ long, short, help = option
+ repeat = 0
+ elif len(option) == 4:
+ long, short, help, repeat = option
+ else:
+ # the option table is part of the code, so simply
+ # assert that it is correct
+ raise ValueError, "invalid option tuple: %r" % (option,)
+
+ # Type- and value-check the option names
+ if not isinstance(long, str) or len(long) < 2:
+ raise DistutilsGetoptError, \
+ ("invalid long option '%s': "
+ "must be a string of length >= 2") % long
+
+ if (not ((short is None) or
+ (isinstance(short, str) and len(short) == 1))):
+ raise DistutilsGetoptError, \
+ ("invalid short option '%s': "
+ "must a single character or None") % short
+
+ self.repeat[long] = repeat
+ self.long_opts.append(long)
+
+ if long[-1] == '=': # option takes an argument?
+ if short: short = short + ':'
+ long = long[0:-1]
+ self.takes_arg[long] = 1
+ else:
+
+ # Is option is a "negative alias" for some other option (eg.
+ # "quiet" == "!verbose")?
+ alias_to = self.negative_alias.get(long)
+ if alias_to is not None:
+ if self.takes_arg[alias_to]:
+ raise DistutilsGetoptError, \
+ ("invalid negative alias '%s': "
+ "aliased option '%s' takes a value") % \
+ (long, alias_to)
+
+ self.long_opts[-1] = long # XXX redundant?!
+ self.takes_arg[long] = 0
+
+ else:
+ self.takes_arg[long] = 0
+
+ # If this is an alias option, make sure its "takes arg" flag is
+ # the same as the option it's aliased to.
+ alias_to = self.alias.get(long)
+ if alias_to is not None:
+ if self.takes_arg[long] != self.takes_arg[alias_to]:
+ raise DistutilsGetoptError, \
+ ("invalid alias '%s': inconsistent with "
+ "aliased option '%s' (one of them takes a value, "
+ "the other doesn't") % (long, alias_to)
+
+
+ # Now enforce some bondage on the long option name, so we can
+ # later translate it to an attribute name on some object. Have
+ # to do this a bit late to make sure we've removed any trailing
+ # '='.
+ if not longopt_re.match(long):
+ raise DistutilsGetoptError, \
+ ("invalid long option name '%s' " +
+ "(must be letters, numbers, hyphens only") % long
+
+ self.attr_name[long] = self.get_attr_name(long)
+ if short:
+ self.short_opts.append(short)
+ self.short2long[short[0]] = long
+
+ # for option_table
+
+ # _grok_option_table()
+
+
+ def getopt (self, args=None, object=None):
+ """Parse command-line options in args. Store as attributes on object.
+
+ If 'args' is None or not supplied, uses 'sys.argv[1:]'. If
+ 'object' is None or not supplied, creates a new OptionDummy
+ object, stores option values there, and returns a tuple (args,
+ object). If 'object' is supplied, it is modified in place and
+ 'getopt()' just returns 'args'; in both cases, the returned
+ 'args' is a modified copy of the passed-in 'args' list, which
+ is left untouched.
+ """
+ if args is None:
+ args = sys.argv[1:]
+ if object is None:
+ object = OptionDummy()
+ created_object = 1
+ else:
+ created_object = 0
+
+ self._grok_option_table()
+
+ short_opts = string.join(self.short_opts)
+ try:
+ opts, args = getopt.getopt(args, short_opts, self.long_opts)
+ except getopt.error, msg:
+ raise DistutilsArgError, msg
+
+ for opt, val in opts:
+ if len(opt) == 2 and opt[0] == '-': # it's a short option
+ opt = self.short2long[opt[1]]
+ else:
+ assert len(opt) > 2 and opt[:2] == '--'
+ opt = opt[2:]
+
+ alias = self.alias.get(opt)
+ if alias:
+ opt = alias
+
+ if not self.takes_arg[opt]: # boolean option?
+ assert val == '', "boolean option can't have value"
+ alias = self.negative_alias.get(opt)
+ if alias:
+ opt = alias
+ val = 0
+ else:
+ val = 1
+
+ attr = self.attr_name[opt]
+ # The only repeating option at the moment is 'verbose'.
+ # It has a negative option -q quiet, which should set verbose = 0.
+ if val and self.repeat.get(attr) is not None:
+ val = getattr(object, attr, 0) + 1
+ setattr(object, attr, val)
+ self.option_order.append((opt, val))
+
+ # for opts
+ if created_object:
+ return args, object
+ else:
+ return args
+
+ # getopt()
+
+
+ def get_option_order (self):
+ """Returns the list of (option, value) tuples processed by the
+ previous run of 'getopt()'. Raises RuntimeError if
+ 'getopt()' hasn't been called yet.
+ """
+ if self.option_order is None:
+ raise RuntimeError, "'getopt()' hasn't been called yet"
+ else:
+ return self.option_order
+
+
+ def generate_help (self, header=None):
+ """Generate help text (a list of strings, one per suggested line of
+ output) from the option table for this FancyGetopt object.
+ """
+ # Blithely assume the option table is good: probably wouldn't call
+ # 'generate_help()' unless you've already called 'getopt()'.
+
+ # First pass: determine maximum length of long option names
+ max_opt = 0
+ for option in self.option_table:
+ long = option[0]
+ short = option[1]
+ l = len(long)
+ if long[-1] == '=':
+ l = l - 1
+ if short is not None:
+ l = l + 5 # " (-x)" where short == 'x'
+ if l > max_opt:
+ max_opt = l
+
+ opt_width = max_opt + 2 + 2 + 2 # room for indent + dashes + gutter
+
+ # Typical help block looks like this:
+ # --foo controls foonabulation
+ # Help block for longest option looks like this:
+ # --flimflam set the flim-flam level
+ # and with wrapped text:
+ # --flimflam set the flim-flam level (must be between
+ # 0 and 100, except on Tuesdays)
+ # Options with short names will have the short name shown (but
+ # it doesn't contribute to max_opt):
+ # --foo (-f) controls foonabulation
+ # If adding the short option would make the left column too wide,
+ # we push the explanation off to the next line
+ # --flimflam (-l)
+ # set the flim-flam level
+ # Important parameters:
+ # - 2 spaces before option block start lines
+ # - 2 dashes for each long option name
+ # - min. 2 spaces between option and explanation (gutter)
+ # - 5 characters (incl. space) for short option name
+
+ # Now generate lines of help text. (If 80 columns were good enough
+ # for Jesus, then 78 columns are good enough for me!)
+ line_width = 78
+ text_width = line_width - opt_width
+ big_indent = ' ' * opt_width
+ if header:
+ lines = [header]
+ else:
+ lines = ['Option summary:']
+
+ for option in self.option_table:
+ long, short, help = option[:3]
+ text = wrap_text(help, text_width)
+ if long[-1] == '=':
+ long = long[0:-1]
+
+ # Case 1: no short option at all (makes life easy)
+ if short is None:
+ if text:
+ lines.append(" --%-*s %s" % (max_opt, long, text[0]))
+ else:
+ lines.append(" --%-*s " % (max_opt, long))
+
+ # Case 2: we have a short option, so we have to include it
+ # just after the long option
+ else:
+ opt_names = "%s (-%s)" % (long, short)
+ if text:
+ lines.append(" --%-*s %s" %
+ (max_opt, opt_names, text[0]))
+ else:
+ lines.append(" --%-*s" % opt_names)
+
+ for l in text[1:]:
+ lines.append(big_indent + l)
+
+ # for self.option_table
+
+ return lines
+
+ # generate_help ()
+
+ def print_help (self, header=None, file=None):
+ if file is None:
+ file = sys.stdout
+ for line in self.generate_help(header):
+ file.write(line + "\n")
+
+# class FancyGetopt
+
+
+def fancy_getopt (options, negative_opt, object, args):
+ parser = FancyGetopt(options)
+ parser.set_negative_aliases(negative_opt)
+ return parser.getopt(args, object)
+
+
+WS_TRANS = string.maketrans(string.whitespace, ' ' * len(string.whitespace))
+
+def wrap_text (text, width):
+ """wrap_text(text : string, width : int) -> [string]
+
+ Split 'text' into multiple lines of no more than 'width' characters
+ each, and return the list of strings that results.
+ """
+
+ if text is None:
+ return []
+ if len(text) <= width:
+ return [text]
+
+ text = string.expandtabs(text)
+ text = string.translate(text, WS_TRANS)
+ chunks = re.split(r'( +|-+)', text)
+ chunks = filter(None, chunks) # ' - ' results in empty strings
+ lines = []
+
+ while chunks:
+
+ cur_line = [] # list of chunks (to-be-joined)
+ cur_len = 0 # length of current line
+
+ while chunks:
+ l = len(chunks[0])
+ if cur_len + l <= width: # can squeeze (at least) this chunk in
+ cur_line.append(chunks[0])
+ del chunks[0]
+ cur_len = cur_len + l
+ else: # this line is full
+ # drop last chunk if all space
+ if cur_line and cur_line[-1][0] == ' ':
+ del cur_line[-1]
+ break
+
+ if chunks: # any chunks left to process?
+
+ # if the current line is still empty, then we had a single
+ # chunk that's too big too fit on a line -- so we break
+ # down and break it up at the line width
+ if cur_len == 0:
+ cur_line.append(chunks[0][0:width])
+ chunks[0] = chunks[0][width:]
+
+ # all-whitespace chunks at the end of a line can be discarded
+ # (and we know from the re.split above that if a chunk has
+ # *any* whitespace, it is *all* whitespace)
+ if chunks[0][0] == ' ':
+ del chunks[0]
+
+ # and store this line in the list-of-all-lines -- as a single
+ # string, of course!
+ lines.append(string.join(cur_line, ''))
+
+ # while chunks
+
+ return lines
+
+
+def translate_longopt(opt):
+ """Convert a long option name to a valid Python identifier by
+ changing "-" to "_".
+ """
+ return string.translate(opt, longopt_xlate)
+
+
+class OptionDummy:
+ """Dummy class just used as a place to hold command-line option
+ values as instance attributes."""
+
+ def __init__ (self, options=[]):
+ """Create a new OptionDummy instance. The attributes listed in
+ 'options' will be initialized to None."""
+ for opt in options:
+ setattr(self, opt, None)
diff --git a/cashew/Lib/distutils/file_util.py b/cashew/Lib/distutils/file_util.py
new file mode 100644
index 0000000..3b236e1
--- /dev/null
+++ b/cashew/Lib/distutils/file_util.py
@@ -0,0 +1,239 @@
+"""distutils.file_util
+
+Utility functions for operating on single files.
+"""
+
+__revision__ = "$Id$"
+
+import os
+from distutils.errors import DistutilsFileError
+from distutils import log
+
+# for generating verbose output in 'copy_file()'
+_copy_action = {None: 'copying',
+ 'hard': 'hard linking',
+ 'sym': 'symbolically linking'}
+
+
+def _copy_file_contents(src, dst, buffer_size=16*1024):
+ """Copy the file 'src' to 'dst'.
+
+ Both must be filenames. Any error opening either file, reading from
+ 'src', or writing to 'dst', raises DistutilsFileError. Data is
+ read/written in chunks of 'buffer_size' bytes (default 16k). No attempt
+ is made to handle anything apart from regular files.
+ """
+ # Stolen from shutil module in the standard library, but with
+ # custom error-handling added.
+ fsrc = None
+ fdst = None
+ try:
+ try:
+ fsrc = open(src, 'rb')
+ except os.error, (errno, errstr):
+ raise DistutilsFileError("could not open '%s': %s" % (src, errstr))
+
+ if os.path.exists(dst):
+ try:
+ os.unlink(dst)
+ except os.error, (errno, errstr):
+ raise DistutilsFileError(
+ "could not delete '%s': %s" % (dst, errstr))
+
+ try:
+ fdst = open(dst, 'wb')
+ except os.error, (errno, errstr):
+ raise DistutilsFileError(
+ "could not create '%s': %s" % (dst, errstr))
+
+ while 1:
+ try:
+ buf = fsrc.read(buffer_size)
+ except os.error, (errno, errstr):
+ raise DistutilsFileError(
+ "could not read from '%s': %s" % (src, errstr))
+
+ if not buf:
+ break
+
+ try:
+ fdst.write(buf)
+ except os.error, (errno, errstr):
+ raise DistutilsFileError(
+ "could not write to '%s': %s" % (dst, errstr))
+
+ finally:
+ if fdst:
+ fdst.close()
+ if fsrc:
+ fsrc.close()
+
+def copy_file(src, dst, preserve_mode=1, preserve_times=1, update=0,
+ link=None, verbose=1, dry_run=0):
+ """Copy a file 'src' to 'dst'.
+
+ If 'dst' is a directory, then 'src' is copied there with the same name;
+ otherwise, it must be a filename. (If the file exists, it will be
+ ruthlessly clobbered.) If 'preserve_mode' is true (the default),
+ the file's mode (type and permission bits, or whatever is analogous on
+ the current platform) is copied. If 'preserve_times' is true (the
+ default), the last-modified and last-access times are copied as well.
+ If 'update' is true, 'src' will only be copied if 'dst' does not exist,
+ or if 'dst' does exist but is older than 'src'.
+
+ 'link' allows you to make hard links (os.link) or symbolic links
+ (os.symlink) instead of copying: set it to "hard" or "sym"; if it is
+ None (the default), files are copied. Don't set 'link' on systems that
+ don't support it: 'copy_file()' doesn't check if hard or symbolic
+ linking is available. If hardlink fails, falls back to
+ _copy_file_contents().
+
+ Under Mac OS, uses the native file copy function in macostools; on
+ other systems, uses '_copy_file_contents()' to copy file contents.
+
+ Return a tuple (dest_name, copied): 'dest_name' is the actual name of
+ the output file, and 'copied' is true if the file was copied (or would
+ have been copied, if 'dry_run' true).
+ """
+ # XXX if the destination file already exists, we clobber it if
+ # copying, but blow up if linking. Hmmm. And I don't know what
+ # macostools.copyfile() does. Should definitely be consistent, and
+ # should probably blow up if destination exists and we would be
+ # changing it (ie. it's not already a hard/soft link to src OR
+ # (not update) and (src newer than dst).
+
+ from distutils.dep_util import newer
+ from stat import ST_ATIME, ST_MTIME, ST_MODE, S_IMODE
+
+ if not os.path.isfile(src):
+ raise DistutilsFileError(
+ "can't copy '%s': doesn't exist or not a regular file" % src)
+
+ if os.path.isdir(dst):
+ dir = dst
+ dst = os.path.join(dst, os.path.basename(src))
+ else:
+ dir = os.path.dirname(dst)
+
+ if update and not newer(src, dst):
+ if verbose >= 1:
+ log.debug("not copying %s (output up-to-date)", src)
+ return dst, 0
+
+ try:
+ action = _copy_action[link]
+ except KeyError:
+ raise ValueError("invalid value '%s' for 'link' argument" % link)
+
+ if verbose >= 1:
+ if os.path.basename(dst) == os.path.basename(src):
+ log.info("%s %s -> %s", action, src, dir)
+ else:
+ log.info("%s %s -> %s", action, src, dst)
+
+ if dry_run:
+ return (dst, 1)
+
+ # If linking (hard or symbolic), use the appropriate system call
+ # (Unix only, of course, but that's the caller's responsibility)
+ if link == 'hard':
+ if not (os.path.exists(dst) and os.path.samefile(src, dst)):
+ try:
+ os.link(src, dst)
+ return (dst, 1)
+ except OSError:
+ # If hard linking fails, fall back on copying file
+ # (some special filesystems don't support hard linking
+ # even under Unix, see issue #8876).
+ pass
+ elif link == 'sym':
+ if not (os.path.exists(dst) and os.path.samefile(src, dst)):
+ os.symlink(src, dst)
+ return (dst, 1)
+
+ # Otherwise (non-Mac, not linking), copy the file contents and
+ # (optionally) copy the times and mode.
+ _copy_file_contents(src, dst)
+ if preserve_mode or preserve_times:
+ st = os.stat(src)
+
+ # According to David Ascher , utime() should be done
+ # before chmod() (at least under NT).
+ if preserve_times:
+ os.utime(dst, (st[ST_ATIME], st[ST_MTIME]))
+ if preserve_mode:
+ os.chmod(dst, S_IMODE(st[ST_MODE]))
+
+ return (dst, 1)
+
+# XXX I suspect this is Unix-specific -- need porting help!
+def move_file (src, dst, verbose=1, dry_run=0):
+ """Move a file 'src' to 'dst'.
+
+ If 'dst' is a directory, the file will be moved into it with the same
+ name; otherwise, 'src' is just renamed to 'dst'. Return the new
+ full name of the file.
+
+ Handles cross-device moves on Unix using 'copy_file()'. What about
+ other systems???
+ """
+ from os.path import exists, isfile, isdir, basename, dirname
+ import errno
+
+ if verbose >= 1:
+ log.info("moving %s -> %s", src, dst)
+
+ if dry_run:
+ return dst
+
+ if not isfile(src):
+ raise DistutilsFileError("can't move '%s': not a regular file" % src)
+
+ if isdir(dst):
+ dst = os.path.join(dst, basename(src))
+ elif exists(dst):
+ raise DistutilsFileError(
+ "can't move '%s': destination '%s' already exists" %
+ (src, dst))
+
+ if not isdir(dirname(dst)):
+ raise DistutilsFileError(
+ "can't move '%s': destination '%s' not a valid path" % \
+ (src, dst))
+
+ copy_it = 0
+ try:
+ os.rename(src, dst)
+ except os.error, (num, msg):
+ if num == errno.EXDEV:
+ copy_it = 1
+ else:
+ raise DistutilsFileError(
+ "couldn't move '%s' to '%s': %s" % (src, dst, msg))
+
+ if copy_it:
+ copy_file(src, dst, verbose=verbose)
+ try:
+ os.unlink(src)
+ except os.error, (num, msg):
+ try:
+ os.unlink(dst)
+ except os.error:
+ pass
+ raise DistutilsFileError(
+ ("couldn't move '%s' to '%s' by copy/delete: " +
+ "delete '%s' failed: %s") %
+ (src, dst, src, msg))
+ return dst
+
+
+def write_file (filename, contents):
+ """Create a file with the specified name and write 'contents' (a
+ sequence of strings without line terminators) to it.
+ """
+ f = open(filename, "w")
+ try:
+ for line in contents:
+ f.write(line + "\n")
+ finally:
+ f.close()
diff --git a/cashew/Lib/distutils/filelist.py b/cashew/Lib/distutils/filelist.py
new file mode 100644
index 0000000..2f1c457
--- /dev/null
+++ b/cashew/Lib/distutils/filelist.py
@@ -0,0 +1,343 @@
+"""distutils.filelist
+
+Provides the FileList class, used for poking about the filesystem
+and building lists of files.
+"""
+
+__revision__ = "$Id$"
+
+import os, re
+import fnmatch
+from distutils.util import convert_path
+from distutils.errors import DistutilsTemplateError, DistutilsInternalError
+from distutils import log
+
+class FileList:
+ """A list of files built by on exploring the filesystem and filtered by
+ applying various patterns to what we find there.
+
+ Instance attributes:
+ dir
+ directory from which files will be taken -- only used if
+ 'allfiles' not supplied to constructor
+ files
+ list of filenames currently being built/filtered/manipulated
+ allfiles
+ complete list of files under consideration (ie. without any
+ filtering applied)
+ """
+
+ def __init__(self, warn=None, debug_print=None):
+ # ignore argument to FileList, but keep them for backwards
+ # compatibility
+ self.allfiles = None
+ self.files = []
+
+ def set_allfiles(self, allfiles):
+ self.allfiles = allfiles
+
+ def findall(self, dir=os.curdir):
+ self.allfiles = findall(dir)
+
+ def debug_print(self, msg):
+ """Print 'msg' to stdout if the global DEBUG (taken from the
+ DISTUTILS_DEBUG environment variable) flag is true.
+ """
+ from distutils.debug import DEBUG
+ if DEBUG:
+ print msg
+
+ # -- List-like methods ---------------------------------------------
+
+ def append(self, item):
+ self.files.append(item)
+
+ def extend(self, items):
+ self.files.extend(items)
+
+ def sort(self):
+ # Not a strict lexical sort!
+ sortable_files = map(os.path.split, self.files)
+ sortable_files.sort()
+ self.files = []
+ for sort_tuple in sortable_files:
+ self.files.append(os.path.join(*sort_tuple))
+
+
+ # -- Other miscellaneous utility methods ---------------------------
+
+ def remove_duplicates(self):
+ # Assumes list has been sorted!
+ for i in range(len(self.files) - 1, 0, -1):
+ if self.files[i] == self.files[i - 1]:
+ del self.files[i]
+
+
+ # -- "File template" methods ---------------------------------------
+
+ def _parse_template_line(self, line):
+ words = line.split()
+ action = words[0]
+
+ patterns = dir = dir_pattern = None
+
+ if action in ('include', 'exclude',
+ 'global-include', 'global-exclude'):
+ if len(words) < 2:
+ raise DistutilsTemplateError, \
+ "'%s' expects ..." % action
+
+ patterns = map(convert_path, words[1:])
+
+ elif action in ('recursive-include', 'recursive-exclude'):
+ if len(words) < 3:
+ raise DistutilsTemplateError, \
+ "'%s' expects ..." % action
+
+ dir = convert_path(words[1])
+ patterns = map(convert_path, words[2:])
+
+ elif action in ('graft', 'prune'):
+ if len(words) != 2:
+ raise DistutilsTemplateError, \
+ "'%s' expects a single " % action
+
+ dir_pattern = convert_path(words[1])
+
+ else:
+ raise DistutilsTemplateError, "unknown action '%s'" % action
+
+ return (action, patterns, dir, dir_pattern)
+
+ def process_template_line(self, line):
+ # Parse the line: split it up, make sure the right number of words
+ # is there, and return the relevant words. 'action' is always
+ # defined: it's the first word of the line. Which of the other
+ # three are defined depends on the action; it'll be either
+ # patterns, (dir and patterns), or (dir_pattern).
+ action, patterns, dir, dir_pattern = self._parse_template_line(line)
+
+ # OK, now we know that the action is valid and we have the
+ # right number of words on the line for that action -- so we
+ # can proceed with minimal error-checking.
+ if action == 'include':
+ self.debug_print("include " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.include_pattern(pattern, anchor=1):
+ log.warn("warning: no files found matching '%s'",
+ pattern)
+
+ elif action == 'exclude':
+ self.debug_print("exclude " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.exclude_pattern(pattern, anchor=1):
+ log.warn(("warning: no previously-included files "
+ "found matching '%s'"), pattern)
+
+ elif action == 'global-include':
+ self.debug_print("global-include " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.include_pattern(pattern, anchor=0):
+ log.warn(("warning: no files found matching '%s' " +
+ "anywhere in distribution"), pattern)
+
+ elif action == 'global-exclude':
+ self.debug_print("global-exclude " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.exclude_pattern(pattern, anchor=0):
+ log.warn(("warning: no previously-included files matching "
+ "'%s' found anywhere in distribution"),
+ pattern)
+
+ elif action == 'recursive-include':
+ self.debug_print("recursive-include %s %s" %
+ (dir, ' '.join(patterns)))
+ for pattern in patterns:
+ if not self.include_pattern(pattern, prefix=dir):
+ log.warn(("warning: no files found matching '%s' " +
+ "under directory '%s'"),
+ pattern, dir)
+
+ elif action == 'recursive-exclude':
+ self.debug_print("recursive-exclude %s %s" %
+ (dir, ' '.join(patterns)))
+ for pattern in patterns:
+ if not self.exclude_pattern(pattern, prefix=dir):
+ log.warn(("warning: no previously-included files matching "
+ "'%s' found under directory '%s'"),
+ pattern, dir)
+
+ elif action == 'graft':
+ self.debug_print("graft " + dir_pattern)
+ if not self.include_pattern(None, prefix=dir_pattern):
+ log.warn("warning: no directories found matching '%s'",
+ dir_pattern)
+
+ elif action == 'prune':
+ self.debug_print("prune " + dir_pattern)
+ if not self.exclude_pattern(None, prefix=dir_pattern):
+ log.warn(("no previously-included directories found " +
+ "matching '%s'"), dir_pattern)
+ else:
+ raise DistutilsInternalError, \
+ "this cannot happen: invalid action '%s'" % action
+
+ # -- Filtering/selection methods -----------------------------------
+
+ def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
+ """Select strings (presumably filenames) from 'self.files' that
+ match 'pattern', a Unix-style wildcard (glob) pattern.
+
+ Patterns are not quite the same as implemented by the 'fnmatch'
+ module: '*' and '?' match non-special characters, where "special"
+ is platform-dependent: slash on Unix; colon, slash, and backslash on
+ DOS/Windows; and colon on Mac OS.
+
+ If 'anchor' is true (the default), then the pattern match is more
+ stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
+ 'anchor' is false, both of these will match.
+
+ If 'prefix' is supplied, then only filenames starting with 'prefix'
+ (itself a pattern) and ending with 'pattern', with anything in between
+ them, will match. 'anchor' is ignored in this case.
+
+ If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
+ 'pattern' is assumed to be either a string containing a regex or a
+ regex object -- no translation is done, the regex is just compiled
+ and used as-is.
+
+ Selected strings will be added to self.files.
+
+ Return 1 if files are found.
+ """
+ # XXX docstring lying about what the special chars are?
+ files_found = 0
+ pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
+ self.debug_print("include_pattern: applying regex r'%s'" %
+ pattern_re.pattern)
+
+ # delayed loading of allfiles list
+ if self.allfiles is None:
+ self.findall()
+
+ for name in self.allfiles:
+ if pattern_re.search(name):
+ self.debug_print(" adding " + name)
+ self.files.append(name)
+ files_found = 1
+
+ return files_found
+
+
+ def exclude_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
+ """Remove strings (presumably filenames) from 'files' that match
+ 'pattern'.
+
+ Other parameters are the same as for 'include_pattern()', above.
+ The list 'self.files' is modified in place. Return 1 if files are
+ found.
+ """
+ files_found = 0
+ pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
+ self.debug_print("exclude_pattern: applying regex r'%s'" %
+ pattern_re.pattern)
+ for i in range(len(self.files)-1, -1, -1):
+ if pattern_re.search(self.files[i]):
+ self.debug_print(" removing " + self.files[i])
+ del self.files[i]
+ files_found = 1
+
+ return files_found
+
+
+# ----------------------------------------------------------------------
+# Utility functions
+
+def findall(dir = os.curdir):
+ """Find all files under 'dir' and return the list of full filenames
+ (relative to 'dir').
+ """
+ from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
+
+ list = []
+ stack = [dir]
+ pop = stack.pop
+ push = stack.append
+
+ while stack:
+ dir = pop()
+ names = os.listdir(dir)
+
+ for name in names:
+ if dir != os.curdir: # avoid the dreaded "./" syndrome
+ fullname = os.path.join(dir, name)
+ else:
+ fullname = name
+
+ # Avoid excess stat calls -- just one will do, thank you!
+ stat = os.stat(fullname)
+ mode = stat[ST_MODE]
+ if S_ISREG(mode):
+ list.append(fullname)
+ elif S_ISDIR(mode) and not S_ISLNK(mode):
+ push(fullname)
+
+ return list
+
+
+def glob_to_re(pattern):
+ """Translate a shell-like glob pattern to a regular expression.
+
+ Return a string containing the regex. Differs from
+ 'fnmatch.translate()' in that '*' does not match "special characters"
+ (which are platform-specific).
+ """
+ pattern_re = fnmatch.translate(pattern)
+
+ # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
+ # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
+ # and by extension they shouldn't match such "special characters" under
+ # any OS. So change all non-escaped dots in the RE to match any
+ # character except the special characters (currently: just os.sep).
+ sep = os.sep
+ if os.sep == '\\':
+ # we're using a regex to manipulate a regex, so we need
+ # to escape the backslash twice
+ sep = r'\\\\'
+ escaped = r'\1[^%s]' % sep
+ pattern_re = re.sub(r'((?= self.threshold:
+ if args:
+ msg = msg % args
+ if level in (WARN, ERROR, FATAL):
+ stream = sys.stderr
+ else:
+ stream = sys.stdout
+ stream.write('%s\n' % msg)
+ stream.flush()
+
+ def log(self, level, msg, *args):
+ self._log(level, msg, args)
+
+ def debug(self, msg, *args):
+ self._log(DEBUG, msg, args)
+
+ def info(self, msg, *args):
+ self._log(INFO, msg, args)
+
+ def warn(self, msg, *args):
+ self._log(WARN, msg, args)
+
+ def error(self, msg, *args):
+ self._log(ERROR, msg, args)
+
+ def fatal(self, msg, *args):
+ self._log(FATAL, msg, args)
+
+_global_log = Log()
+log = _global_log.log
+debug = _global_log.debug
+info = _global_log.info
+warn = _global_log.warn
+error = _global_log.error
+fatal = _global_log.fatal
+
+def set_threshold(level):
+ # return the old threshold for use from tests
+ old = _global_log.threshold
+ _global_log.threshold = level
+ return old
+
+def set_verbosity(v):
+ if v <= 0:
+ set_threshold(WARN)
+ elif v == 1:
+ set_threshold(INFO)
+ elif v >= 2:
+ set_threshold(DEBUG)
diff --git a/cashew/Lib/distutils/msvccompiler.py b/cashew/Lib/distutils/msvccompiler.py
new file mode 100644
index 0000000..0e69fd3
--- /dev/null
+++ b/cashew/Lib/distutils/msvccompiler.py
@@ -0,0 +1,659 @@
+"""distutils.msvccompiler
+
+Contains MSVCCompiler, an implementation of the abstract CCompiler class
+for the Microsoft Visual Studio.
+"""
+
+# Written by Perry Stoll
+# hacked by Robin Becker and Thomas Heller to do a better job of
+# finding DevStudio (through the registry)
+
+__revision__ = "$Id$"
+
+import sys
+import os
+import string
+
+from distutils.errors import (DistutilsExecError, DistutilsPlatformError,
+ CompileError, LibError, LinkError)
+from distutils.ccompiler import CCompiler, gen_lib_options
+from distutils import log
+
+_can_read_reg = 0
+try:
+ import _winreg
+
+ _can_read_reg = 1
+ hkey_mod = _winreg
+
+ RegOpenKeyEx = _winreg.OpenKeyEx
+ RegEnumKey = _winreg.EnumKey
+ RegEnumValue = _winreg.EnumValue
+ RegError = _winreg.error
+
+except ImportError:
+ try:
+ import win32api
+ import win32con
+ _can_read_reg = 1
+ hkey_mod = win32con
+
+ RegOpenKeyEx = win32api.RegOpenKeyEx
+ RegEnumKey = win32api.RegEnumKey
+ RegEnumValue = win32api.RegEnumValue
+ RegError = win32api.error
+
+ except ImportError:
+ log.info("Warning: Can't read registry to find the "
+ "necessary compiler setting\n"
+ "Make sure that Python modules _winreg, "
+ "win32api or win32con are installed.")
+ pass
+
+if _can_read_reg:
+ HKEYS = (hkey_mod.HKEY_USERS,
+ hkey_mod.HKEY_CURRENT_USER,
+ hkey_mod.HKEY_LOCAL_MACHINE,
+ hkey_mod.HKEY_CLASSES_ROOT)
+
+def read_keys(base, key):
+ """Return list of registry keys."""
+
+ try:
+ handle = RegOpenKeyEx(base, key)
+ except RegError:
+ return None
+ L = []
+ i = 0
+ while 1:
+ try:
+ k = RegEnumKey(handle, i)
+ except RegError:
+ break
+ L.append(k)
+ i = i + 1
+ return L
+
+def read_values(base, key):
+ """Return dict of registry keys and values.
+
+ All names are converted to lowercase.
+ """
+ try:
+ handle = RegOpenKeyEx(base, key)
+ except RegError:
+ return None
+ d = {}
+ i = 0
+ while 1:
+ try:
+ name, value, type = RegEnumValue(handle, i)
+ except RegError:
+ break
+ name = name.lower()
+ d[convert_mbcs(name)] = convert_mbcs(value)
+ i = i + 1
+ return d
+
+def convert_mbcs(s):
+ enc = getattr(s, "encode", None)
+ if enc is not None:
+ try:
+ s = enc("mbcs")
+ except UnicodeError:
+ pass
+ return s
+
+class MacroExpander:
+
+ def __init__(self, version):
+ self.macros = {}
+ self.load_macros(version)
+
+ def set_macro(self, macro, path, key):
+ for base in HKEYS:
+ d = read_values(base, path)
+ if d:
+ self.macros["$(%s)" % macro] = d[key]
+ break
+
+ def load_macros(self, version):
+ vsbase = r"Software\Microsoft\VisualStudio\%0.1f" % version
+ self.set_macro("VCInstallDir", vsbase + r"\Setup\VC", "productdir")
+ self.set_macro("VSInstallDir", vsbase + r"\Setup\VS", "productdir")
+ net = r"Software\Microsoft\.NETFramework"
+ self.set_macro("FrameworkDir", net, "installroot")
+ try:
+ if version > 7.0:
+ self.set_macro("FrameworkSDKDir", net, "sdkinstallrootv1.1")
+ else:
+ self.set_macro("FrameworkSDKDir", net, "sdkinstallroot")
+ except KeyError:
+ raise DistutilsPlatformError, \
+ ("""Python was built with Visual Studio 2003;
+extensions must be built with a compiler than can generate compatible binaries.
+Visual Studio 2003 was not found on this system. If you have Cygwin installed,
+you can try compiling with MingW32, by passing "-c mingw32" to setup.py.""")
+
+ p = r"Software\Microsoft\NET Framework Setup\Product"
+ for base in HKEYS:
+ try:
+ h = RegOpenKeyEx(base, p)
+ except RegError:
+ continue
+ key = RegEnumKey(h, 0)
+ d = read_values(base, r"%s\%s" % (p, key))
+ self.macros["$(FrameworkVersion)"] = d["version"]
+
+ def sub(self, s):
+ for k, v in self.macros.items():
+ s = string.replace(s, k, v)
+ return s
+
+def get_build_version():
+ """Return the version of MSVC that was used to build Python.
+
+ For Python 2.3 and up, the version number is included in
+ sys.version. For earlier versions, assume the compiler is MSVC 6.
+ """
+
+ prefix = "MSC v."
+ i = string.find(sys.version, prefix)
+ if i == -1:
+ return 6
+ i = i + len(prefix)
+ s, rest = sys.version[i:].split(" ", 1)
+ majorVersion = int(s[:-2]) - 6
+ minorVersion = int(s[2:3]) / 10.0
+ # I don't think paths are affected by minor version in version 6
+ if majorVersion == 6:
+ minorVersion = 0
+ if majorVersion >= 6:
+ return majorVersion + minorVersion
+ # else we don't know what version of the compiler this is
+ return None
+
+def get_build_architecture():
+ """Return the processor architecture.
+
+ Possible results are "Intel", "Itanium", or "AMD64".
+ """
+
+ prefix = " bit ("
+ i = string.find(sys.version, prefix)
+ if i == -1:
+ return "Intel"
+ j = string.find(sys.version, ")", i)
+ return sys.version[i+len(prefix):j]
+
+def normalize_and_reduce_paths(paths):
+ """Return a list of normalized paths with duplicates removed.
+
+ The current order of paths is maintained.
+ """
+ # Paths are normalized so things like: /a and /a/ aren't both preserved.
+ reduced_paths = []
+ for p in paths:
+ np = os.path.normpath(p)
+ # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set.
+ if np not in reduced_paths:
+ reduced_paths.append(np)
+ return reduced_paths
+
+
+class MSVCCompiler (CCompiler) :
+ """Concrete class that implements an interface to Microsoft Visual C++,
+ as defined by the CCompiler abstract class."""
+
+ compiler_type = 'msvc'
+
+ # Just set this so CCompiler's constructor doesn't barf. We currently
+ # don't use the 'set_executables()' bureaucracy provided by CCompiler,
+ # as it really isn't necessary for this sort of single-compiler class.
+ # Would be nice to have a consistent interface with UnixCCompiler,
+ # though, so it's worth thinking about.
+ executables = {}
+
+ # Private class data (need to distinguish C from C++ source for compiler)
+ _c_extensions = ['.c']
+ _cpp_extensions = ['.cc', '.cpp', '.cxx']
+ _rc_extensions = ['.rc']
+ _mc_extensions = ['.mc']
+
+ # Needed for the filename generation methods provided by the
+ # base class, CCompiler.
+ src_extensions = (_c_extensions + _cpp_extensions +
+ _rc_extensions + _mc_extensions)
+ res_extension = '.res'
+ obj_extension = '.obj'
+ static_lib_extension = '.lib'
+ shared_lib_extension = '.dll'
+ static_lib_format = shared_lib_format = '%s%s'
+ exe_extension = '.exe'
+
+ def __init__ (self, verbose=0, dry_run=0, force=0):
+ CCompiler.__init__ (self, verbose, dry_run, force)
+ self.__version = get_build_version()
+ self.__arch = get_build_architecture()
+ if self.__arch == "Intel":
+ # x86
+ if self.__version >= 7:
+ self.__root = r"Software\Microsoft\VisualStudio"
+ self.__macros = MacroExpander(self.__version)
+ else:
+ self.__root = r"Software\Microsoft\Devstudio"
+ self.__product = "Visual Studio version %s" % self.__version
+ else:
+ # Win64. Assume this was built with the platform SDK
+ self.__product = "Microsoft SDK compiler %s" % (self.__version + 6)
+
+ self.initialized = False
+
+ def initialize(self):
+ self.__paths = []
+ if "DISTUTILS_USE_SDK" in os.environ and "MSSdk" in os.environ and self.find_exe("cl.exe"):
+ # Assume that the SDK set up everything alright; don't try to be
+ # smarter
+ self.cc = "cl.exe"
+ self.linker = "link.exe"
+ self.lib = "lib.exe"
+ self.rc = "rc.exe"
+ self.mc = "mc.exe"
+ else:
+ self.__paths = self.get_msvc_paths("path")
+
+ if len (self.__paths) == 0:
+ raise DistutilsPlatformError, \
+ ("Python was built with %s, "
+ "and extensions need to be built with the same "
+ "version of the compiler, but it isn't installed." % self.__product)
+
+ self.cc = self.find_exe("cl.exe")
+ self.linker = self.find_exe("link.exe")
+ self.lib = self.find_exe("lib.exe")
+ self.rc = self.find_exe("rc.exe") # resource compiler
+ self.mc = self.find_exe("mc.exe") # message compiler
+ self.set_path_env_var('lib')
+ self.set_path_env_var('include')
+
+ # extend the MSVC path with the current path
+ try:
+ for p in string.split(os.environ['path'], ';'):
+ self.__paths.append(p)
+ except KeyError:
+ pass
+ self.__paths = normalize_and_reduce_paths(self.__paths)
+ os.environ['path'] = string.join(self.__paths, ';')
+
+ self.preprocess_options = None
+ if self.__arch == "Intel":
+ self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', '/GX' ,
+ '/DNDEBUG']
+ self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', '/GX',
+ '/Z7', '/D_DEBUG']
+ else:
+ # Win64
+ self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', '/GS-' ,
+ '/DNDEBUG']
+ self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', '/GS-',
+ '/Z7', '/D_DEBUG']
+
+ self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO']
+ if self.__version >= 7:
+ self.ldflags_shared_debug = [
+ '/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG'
+ ]
+ else:
+ self.ldflags_shared_debug = [
+ '/DLL', '/nologo', '/INCREMENTAL:no', '/pdb:None', '/DEBUG'
+ ]
+ self.ldflags_static = [ '/nologo']
+
+ self.initialized = True
+
+ # -- Worker methods ------------------------------------------------
+
+ def object_filenames (self,
+ source_filenames,
+ strip_dir=0,
+ output_dir=''):
+ # Copied from ccompiler.py, extended to return .res as 'object'-file
+ # for .rc input file
+ if output_dir is None: output_dir = ''
+ obj_names = []
+ for src_name in source_filenames:
+ (base, ext) = os.path.splitext (src_name)
+ base = os.path.splitdrive(base)[1] # Chop off the drive
+ base = base[os.path.isabs(base):] # If abs, chop off leading /
+ if ext not in self.src_extensions:
+ # Better to raise an exception instead of silently continuing
+ # and later complain about sources and targets having
+ # different lengths
+ raise CompileError ("Don't know how to compile %s" % src_name)
+ if strip_dir:
+ base = os.path.basename (base)
+ if ext in self._rc_extensions:
+ obj_names.append (os.path.join (output_dir,
+ base + self.res_extension))
+ elif ext in self._mc_extensions:
+ obj_names.append (os.path.join (output_dir,
+ base + self.res_extension))
+ else:
+ obj_names.append (os.path.join (output_dir,
+ base + self.obj_extension))
+ return obj_names
+
+ # object_filenames ()
+
+
+ def compile(self, sources,
+ output_dir=None, macros=None, include_dirs=None, debug=0,
+ extra_preargs=None, extra_postargs=None, depends=None):
+
+ if not self.initialized: self.initialize()
+ macros, objects, extra_postargs, pp_opts, build = \
+ self._setup_compile(output_dir, macros, include_dirs, sources,
+ depends, extra_postargs)
+
+ compile_opts = extra_preargs or []
+ compile_opts.append ('/c')
+ if debug:
+ compile_opts.extend(self.compile_options_debug)
+ else:
+ compile_opts.extend(self.compile_options)
+
+ for obj in objects:
+ try:
+ src, ext = build[obj]
+ except KeyError:
+ continue
+ if debug:
+ # pass the full pathname to MSVC in debug mode,
+ # this allows the debugger to find the source file
+ # without asking the user to browse for it
+ src = os.path.abspath(src)
+
+ if ext in self._c_extensions:
+ input_opt = "/Tc" + src
+ elif ext in self._cpp_extensions:
+ input_opt = "/Tp" + src
+ elif ext in self._rc_extensions:
+ # compile .RC to .RES file
+ input_opt = src
+ output_opt = "/fo" + obj
+ try:
+ self.spawn ([self.rc] + pp_opts +
+ [output_opt] + [input_opt])
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+ continue
+ elif ext in self._mc_extensions:
+
+ # Compile .MC to .RC file to .RES file.
+ # * '-h dir' specifies the directory for the
+ # generated include file
+ # * '-r dir' specifies the target directory of the
+ # generated RC file and the binary message resource
+ # it includes
+ #
+ # For now (since there are no options to change this),
+ # we use the source-directory for the include file and
+ # the build directory for the RC file and message
+ # resources. This works at least for win32all.
+
+ h_dir = os.path.dirname (src)
+ rc_dir = os.path.dirname (obj)
+ try:
+ # first compile .MC to .RC and .H file
+ self.spawn ([self.mc] +
+ ['-h', h_dir, '-r', rc_dir] + [src])
+ base, _ = os.path.splitext (os.path.basename (src))
+ rc_file = os.path.join (rc_dir, base + '.rc')
+ # then compile .RC to .RES file
+ self.spawn ([self.rc] +
+ ["/fo" + obj] + [rc_file])
+
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+ continue
+ else:
+ # how to handle this file?
+ raise CompileError (
+ "Don't know how to compile %s to %s" % \
+ (src, obj))
+
+ output_opt = "/Fo" + obj
+ try:
+ self.spawn ([self.cc] + compile_opts + pp_opts +
+ [input_opt, output_opt] +
+ extra_postargs)
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ return objects
+
+ # compile ()
+
+
+ def create_static_lib (self,
+ objects,
+ output_libname,
+ output_dir=None,
+ debug=0,
+ target_lang=None):
+
+ if not self.initialized: self.initialize()
+ (objects, output_dir) = self._fix_object_args (objects, output_dir)
+ output_filename = \
+ self.library_filename (output_libname, output_dir=output_dir)
+
+ if self._need_link (objects, output_filename):
+ lib_args = objects + ['/OUT:' + output_filename]
+ if debug:
+ pass # XXX what goes here?
+ try:
+ self.spawn ([self.lib] + lib_args)
+ except DistutilsExecError, msg:
+ raise LibError, msg
+
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ # create_static_lib ()
+
+ def link (self,
+ target_desc,
+ objects,
+ output_filename,
+ output_dir=None,
+ libraries=None,
+ library_dirs=None,
+ runtime_library_dirs=None,
+ export_symbols=None,
+ debug=0,
+ extra_preargs=None,
+ extra_postargs=None,
+ build_temp=None,
+ target_lang=None):
+
+ if not self.initialized: self.initialize()
+ (objects, output_dir) = self._fix_object_args (objects, output_dir)
+ (libraries, library_dirs, runtime_library_dirs) = \
+ self._fix_lib_args (libraries, library_dirs, runtime_library_dirs)
+
+ if runtime_library_dirs:
+ self.warn ("I don't know what to do with 'runtime_library_dirs': "
+ + str (runtime_library_dirs))
+
+ lib_opts = gen_lib_options (self,
+ library_dirs, runtime_library_dirs,
+ libraries)
+ if output_dir is not None:
+ output_filename = os.path.join (output_dir, output_filename)
+
+ if self._need_link (objects, output_filename):
+
+ if target_desc == CCompiler.EXECUTABLE:
+ if debug:
+ ldflags = self.ldflags_shared_debug[1:]
+ else:
+ ldflags = self.ldflags_shared[1:]
+ else:
+ if debug:
+ ldflags = self.ldflags_shared_debug
+ else:
+ ldflags = self.ldflags_shared
+
+ export_opts = []
+ for sym in (export_symbols or []):
+ export_opts.append("/EXPORT:" + sym)
+
+ ld_args = (ldflags + lib_opts + export_opts +
+ objects + ['/OUT:' + output_filename])
+
+ # The MSVC linker generates .lib and .exp files, which cannot be
+ # suppressed by any linker switches. The .lib files may even be
+ # needed! Make sure they are generated in the temporary build
+ # directory. Since they have different names for debug and release
+ # builds, they can go into the same directory.
+ if export_symbols is not None:
+ (dll_name, dll_ext) = os.path.splitext(
+ os.path.basename(output_filename))
+ implib_file = os.path.join(
+ os.path.dirname(objects[0]),
+ self.library_filename(dll_name))
+ ld_args.append ('/IMPLIB:' + implib_file)
+
+ if extra_preargs:
+ ld_args[:0] = extra_preargs
+ if extra_postargs:
+ ld_args.extend(extra_postargs)
+
+ self.mkpath (os.path.dirname (output_filename))
+ try:
+ self.spawn ([self.linker] + ld_args)
+ except DistutilsExecError, msg:
+ raise LinkError, msg
+
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ # link ()
+
+
+ # -- Miscellaneous methods -----------------------------------------
+ # These are all used by the 'gen_lib_options() function, in
+ # ccompiler.py.
+
+ def library_dir_option (self, dir):
+ return "/LIBPATH:" + dir
+
+ def runtime_library_dir_option (self, dir):
+ raise DistutilsPlatformError, \
+ "don't know how to set runtime library search path for MSVC++"
+
+ def library_option (self, lib):
+ return self.library_filename (lib)
+
+
+ def find_library_file (self, dirs, lib, debug=0):
+ # Prefer a debugging library if found (and requested), but deal
+ # with it if we don't have one.
+ if debug:
+ try_names = [lib + "_d", lib]
+ else:
+ try_names = [lib]
+ for dir in dirs:
+ for name in try_names:
+ libfile = os.path.join(dir, self.library_filename (name))
+ if os.path.exists(libfile):
+ return libfile
+ else:
+ # Oops, didn't find it in *any* of 'dirs'
+ return None
+
+ # find_library_file ()
+
+ # Helper methods for using the MSVC registry settings
+
+ def find_exe(self, exe):
+ """Return path to an MSVC executable program.
+
+ Tries to find the program in several places: first, one of the
+ MSVC program search paths from the registry; next, the directories
+ in the PATH environment variable. If any of those work, return an
+ absolute path that is known to exist. If none of them work, just
+ return the original program name, 'exe'.
+ """
+
+ for p in self.__paths:
+ fn = os.path.join(os.path.abspath(p), exe)
+ if os.path.isfile(fn):
+ return fn
+
+ # didn't find it; try existing path
+ for p in string.split(os.environ['Path'],';'):
+ fn = os.path.join(os.path.abspath(p),exe)
+ if os.path.isfile(fn):
+ return fn
+
+ return exe
+
+ def get_msvc_paths(self, path, platform='x86'):
+ """Get a list of devstudio directories (include, lib or path).
+
+ Return a list of strings. The list will be empty if unable to
+ access the registry or appropriate registry keys not found.
+ """
+
+ if not _can_read_reg:
+ return []
+
+ path = path + " dirs"
+ if self.__version >= 7:
+ key = (r"%s\%0.1f\VC\VC_OBJECTS_PLATFORM_INFO\Win32\Directories"
+ % (self.__root, self.__version))
+ else:
+ key = (r"%s\6.0\Build System\Components\Platforms"
+ r"\Win32 (%s)\Directories" % (self.__root, platform))
+
+ for base in HKEYS:
+ d = read_values(base, key)
+ if d:
+ if self.__version >= 7:
+ return string.split(self.__macros.sub(d[path]), ";")
+ else:
+ return string.split(d[path], ";")
+ # MSVC 6 seems to create the registry entries we need only when
+ # the GUI is run.
+ if self.__version == 6:
+ for base in HKEYS:
+ if read_values(base, r"%s\6.0" % self.__root) is not None:
+ self.warn("It seems you have Visual Studio 6 installed, "
+ "but the expected registry settings are not present.\n"
+ "You must at least run the Visual Studio GUI once "
+ "so that these entries are created.")
+ break
+ return []
+
+ def set_path_env_var(self, name):
+ """Set environment variable 'name' to an MSVC path type value.
+
+ This is equivalent to a SET command prior to execution of spawned
+ commands.
+ """
+
+ if name == "lib":
+ p = self.get_msvc_paths("library")
+ else:
+ p = self.get_msvc_paths(name)
+ if p:
+ os.environ[name] = string.join(p, ';')
+
+
+if get_build_version() >= 8.0:
+ log.debug("Importing new compiler from distutils.msvc9compiler")
+ OldMSVCCompiler = MSVCCompiler
+ from distutils.msvc9compiler import MSVCCompiler
+ # get_build_architecture not really relevant now we support cross-compile
+ from distutils.msvc9compiler import MacroExpander
diff --git a/cashew/Lib/distutils/spawn.py b/cashew/Lib/distutils/spawn.py
new file mode 100644
index 0000000..321344a
--- /dev/null
+++ b/cashew/Lib/distutils/spawn.py
@@ -0,0 +1,226 @@
+"""distutils.spawn
+
+Provides the 'spawn()' function, a front-end to various platform-
+specific functions for launching another program in a sub-process.
+Also provides the 'find_executable()' to search the path for a given
+executable name.
+"""
+
+__revision__ = "$Id$"
+
+import sys
+import os
+
+from distutils.errors import DistutilsPlatformError, DistutilsExecError
+from distutils.debug import DEBUG
+from distutils import log
+
+def spawn(cmd, search_path=1, verbose=0, dry_run=0):
+ """Run another program, specified as a command list 'cmd', in a new process.
+
+ 'cmd' is just the argument list for the new process, ie.
+ cmd[0] is the program to run and cmd[1:] are the rest of its arguments.
+ There is no way to run a program with a name different from that of its
+ executable.
+
+ If 'search_path' is true (the default), the system's executable
+ search path will be used to find the program; otherwise, cmd[0]
+ must be the exact path to the executable. If 'dry_run' is true,
+ the command will not actually be run.
+
+ Raise DistutilsExecError if running the program fails in any way; just
+ return on success.
+ """
+ # cmd is documented as a list, but just in case some code passes a tuple
+ # in, protect our %-formatting code against horrible death
+ cmd = list(cmd)
+ if os.name == 'posix':
+ _spawn_posix(cmd, search_path, dry_run=dry_run)
+ elif os.name == 'nt':
+ _spawn_nt(cmd, search_path, dry_run=dry_run)
+ elif os.name == 'os2':
+ _spawn_os2(cmd, search_path, dry_run=dry_run)
+ else:
+ raise DistutilsPlatformError, \
+ "don't know how to spawn programs on platform '%s'" % os.name
+
+def _nt_quote_args(args):
+ """Quote command-line arguments for DOS/Windows conventions.
+
+ Just wraps every argument which contains blanks in double quotes, and
+ returns a new argument list.
+ """
+ # XXX this doesn't seem very robust to me -- but if the Windows guys
+ # say it'll work, I guess I'll have to accept it. (What if an arg
+ # contains quotes? What other magic characters, other than spaces,
+ # have to be escaped? Is there an escaping mechanism other than
+ # quoting?)
+ for i, arg in enumerate(args):
+ if ' ' in arg:
+ args[i] = '"%s"' % arg
+ return args
+
+def _spawn_nt(cmd, search_path=1, verbose=0, dry_run=0):
+ executable = cmd[0]
+ cmd = _nt_quote_args(cmd)
+ if search_path:
+ # either we find one or it stays the same
+ executable = find_executable(executable) or executable
+ log.info(' '.join([executable] + cmd[1:]))
+ if not dry_run:
+ # spawn for NT requires a full path to the .exe
+ try:
+ rc = os.spawnv(os.P_WAIT, executable, cmd)
+ except OSError, exc:
+ # this seems to happen when the command isn't found
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r failed: %s" % (cmd, exc[-1])
+ if rc != 0:
+ # and this reflects the command running but failing
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r failed with exit status %d" % (cmd, rc)
+
+def _spawn_os2(cmd, search_path=1, verbose=0, dry_run=0):
+ executable = cmd[0]
+ if search_path:
+ # either we find one or it stays the same
+ executable = find_executable(executable) or executable
+ log.info(' '.join([executable] + cmd[1:]))
+ if not dry_run:
+ # spawnv for OS/2 EMX requires a full path to the .exe
+ try:
+ rc = os.spawnv(os.P_WAIT, executable, cmd)
+ except OSError, exc:
+ # this seems to happen when the command isn't found
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r failed: %s" % (cmd, exc[-1])
+ if rc != 0:
+ # and this reflects the command running but failing
+ if not DEBUG:
+ cmd = executable
+ log.debug("command %r failed with exit status %d" % (cmd, rc))
+ raise DistutilsExecError, \
+ "command %r failed with exit status %d" % (cmd, rc)
+
+if sys.platform == 'darwin':
+ from distutils import sysconfig
+ _cfg_target = None
+ _cfg_target_split = None
+
+def _spawn_posix(cmd, search_path=1, verbose=0, dry_run=0):
+ log.info(' '.join(cmd))
+ if dry_run:
+ return
+ executable = cmd[0]
+ exec_fn = search_path and os.execvp or os.execv
+ env = None
+ if sys.platform == 'darwin':
+ global _cfg_target, _cfg_target_split
+ if _cfg_target is None:
+ _cfg_target = sysconfig.get_config_var(
+ 'MACOSX_DEPLOYMENT_TARGET') or ''
+ if _cfg_target:
+ _cfg_target_split = [int(x) for x in _cfg_target.split('.')]
+ if _cfg_target:
+ # ensure that the deployment target of build process is not less
+ # than that used when the interpreter was built. This ensures
+ # extension modules are built with correct compatibility values
+ cur_target = os.environ.get('MACOSX_DEPLOYMENT_TARGET', _cfg_target)
+ if _cfg_target_split > [int(x) for x in cur_target.split('.')]:
+ my_msg = ('$MACOSX_DEPLOYMENT_TARGET mismatch: '
+ 'now "%s" but "%s" during configure'
+ % (cur_target, _cfg_target))
+ raise DistutilsPlatformError(my_msg)
+ env = dict(os.environ,
+ MACOSX_DEPLOYMENT_TARGET=cur_target)
+ exec_fn = search_path and os.execvpe or os.execve
+ pid = os.fork()
+
+ if pid == 0: # in the child
+ try:
+ if env is None:
+ exec_fn(executable, cmd)
+ else:
+ exec_fn(executable, cmd, env)
+ except OSError, e:
+ if not DEBUG:
+ cmd = executable
+ sys.stderr.write("unable to execute %r: %s\n" %
+ (cmd, e.strerror))
+ os._exit(1)
+
+ if not DEBUG:
+ cmd = executable
+ sys.stderr.write("unable to execute %r for unknown reasons" % cmd)
+ os._exit(1)
+ else: # in the parent
+ # Loop until the child either exits or is terminated by a signal
+ # (ie. keep waiting if it's merely stopped)
+ while 1:
+ try:
+ pid, status = os.waitpid(pid, 0)
+ except OSError, exc:
+ import errno
+ if exc.errno == errno.EINTR:
+ continue
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r failed: %s" % (cmd, exc[-1])
+ if os.WIFSIGNALED(status):
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r terminated by signal %d" % \
+ (cmd, os.WTERMSIG(status))
+
+ elif os.WIFEXITED(status):
+ exit_status = os.WEXITSTATUS(status)
+ if exit_status == 0:
+ return # hey, it succeeded!
+ else:
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "command %r failed with exit status %d" % \
+ (cmd, exit_status)
+
+ elif os.WIFSTOPPED(status):
+ continue
+
+ else:
+ if not DEBUG:
+ cmd = executable
+ raise DistutilsExecError, \
+ "unknown error executing %r: termination status %d" % \
+ (cmd, status)
+
+def find_executable(executable, path=None):
+ """Tries to find 'executable' in the directories listed in 'path'.
+
+ A string listing directories separated by 'os.pathsep'; defaults to
+ os.environ['PATH']. Returns the complete filename or None if not found.
+ """
+ if path is None:
+ path = os.environ['PATH']
+ paths = path.split(os.pathsep)
+ base, ext = os.path.splitext(executable)
+
+ if (sys.platform == 'win32' or os.name == 'os2') and (ext != '.exe'):
+ executable = executable + '.exe'
+
+ if not os.path.isfile(executable):
+ for p in paths:
+ f = os.path.join(p, executable)
+ if os.path.isfile(f):
+ # the file exists, we have a shot at spawn working
+ return f
+ return None
+ else:
+ return executable
diff --git a/cashew/Lib/distutils/sysconfig.py b/cashew/Lib/distutils/sysconfig.py
new file mode 100644
index 0000000..de7da1d
--- /dev/null
+++ b/cashew/Lib/distutils/sysconfig.py
@@ -0,0 +1,483 @@
+"""Provide access to Python's configuration information. The specific
+configuration variables available depend heavily on the platform and
+configuration. The values may be retrieved using
+get_config_var(name), and the list of variables is available via
+get_config_vars().keys(). Additional convenience functions are also
+available.
+
+Written by: Fred L. Drake, Jr.
+Email:
+"""
+
+__revision__ = "$Id$"
+
+import os
+import re
+import string
+import sys
+
+from distutils.errors import DistutilsPlatformError
+
+# These are needed in a couple of spots, so just compute them once.
+PREFIX = os.path.normpath(sys.prefix)
+EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
+
+# Path to the base directory of the project. On Windows the binary may
+# live in project/PCBuild9. If we're dealing with an x64 Windows build,
+# it'll live in project/PCbuild/amd64.
+project_base = os.path.dirname(os.path.abspath(sys.executable))
+if os.name == "nt" and "pcbuild" in project_base[-8:].lower():
+ project_base = os.path.abspath(os.path.join(project_base, os.path.pardir))
+# PC/VS7.1
+if os.name == "nt" and "\\pc\\v" in project_base[-10:].lower():
+ project_base = os.path.abspath(os.path.join(project_base, os.path.pardir,
+ os.path.pardir))
+# PC/AMD64
+if os.name == "nt" and "\\pcbuild\\amd64" in project_base[-14:].lower():
+ project_base = os.path.abspath(os.path.join(project_base, os.path.pardir,
+ os.path.pardir))
+
+# set for cross builds
+if "_PYTHON_PROJECT_BASE" in os.environ:
+ # this is the build directory, at least for posix
+ project_base = os.path.normpath(os.environ["_PYTHON_PROJECT_BASE"])
+
+# python_build: (Boolean) if true, we're either building Python or
+# building an extension with an un-installed Python, so we use
+# different (hard-wired) directories.
+# Setup.local is available for Makefile builds including VPATH builds,
+# Setup.dist is available on Windows
+def _python_build():
+ for fn in ("Setup.dist", "Setup.local"):
+ if os.path.isfile(os.path.join(project_base, "Modules", fn)):
+ return True
+ return False
+python_build = _python_build()
+
+
+def get_python_version():
+ """Return a string containing the major and minor Python version,
+ leaving off the patchlevel. Sample return values could be '1.5'
+ or '2.2'.
+ """
+ return sys.version[:3]
+
+
+def get_python_inc(plat_specific=0, prefix=None):
+ """Return the directory containing installed Python header files.
+
+ If 'plat_specific' is false (the default), this is the path to the
+ non-platform-specific header files, i.e. Python.h and so on;
+ otherwise, this is the path to platform-specific header files
+ (namely pyconfig.h).
+
+ If 'prefix' is supplied, use it instead of sys.prefix or
+ sys.exec_prefix -- i.e., ignore 'plat_specific'.
+ """
+ if prefix is None:
+ prefix = plat_specific and EXEC_PREFIX or PREFIX
+
+ if os.name == "posix":
+ if python_build:
+ buildir = os.path.dirname(sys.executable)
+ if plat_specific:
+ # python.h is located in the buildir
+ inc_dir = buildir
+ else:
+ # the source dir is relative to the buildir
+ srcdir = os.path.abspath(os.path.join(buildir,
+ get_config_var('srcdir')))
+ # Include is located in the srcdir
+ inc_dir = os.path.join(srcdir, "Include")
+ return inc_dir
+ return os.path.join(prefix, "include", "python" + get_python_version())
+ elif os.name == "nt":
+ return os.path.join(prefix, "include")
+ elif os.name == "os2":
+ return os.path.join(prefix, "Include")
+ else:
+ raise DistutilsPlatformError(
+ "I don't know where Python installs its C header files "
+ "on platform '%s'" % os.name)
+
+
+def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
+ """Return the directory containing the Python library (standard or
+ site additions).
+
+ If 'plat_specific' is true, return the directory containing
+ platform-specific modules, i.e. any module from a non-pure-Python
+ module distribution; otherwise, return the platform-shared library
+ directory. If 'standard_lib' is true, return the directory
+ containing standard Python library modules; otherwise, return the
+ directory for site-specific modules.
+
+ If 'prefix' is supplied, use it instead of sys.prefix or
+ sys.exec_prefix -- i.e., ignore 'plat_specific'.
+ """
+ if prefix is None:
+ prefix = plat_specific and EXEC_PREFIX or PREFIX
+
+ if os.name == "posix":
+ libpython = os.path.join(prefix,
+ "lib", "python" + get_python_version())
+ if standard_lib:
+ return libpython
+ else:
+ return os.path.join(libpython, "site-packages")
+
+ elif os.name == "nt":
+ if standard_lib:
+ return os.path.join(prefix, "Lib")
+ else:
+ if get_python_version() < "2.2":
+ return prefix
+ else:
+ return os.path.join(prefix, "Lib", "site-packages")
+
+ elif os.name == "os2":
+ if standard_lib:
+ return os.path.join(prefix, "Lib")
+ else:
+ return os.path.join(prefix, "Lib", "site-packages")
+
+ else:
+ raise DistutilsPlatformError(
+ "I don't know where Python installs its library "
+ "on platform '%s'" % os.name)
+
+
+
+def customize_compiler(compiler):
+ """Do any platform-specific customization of a CCompiler instance.
+
+ Mainly needed on Unix, so we can plug in the information that
+ varies across Unices and is stored in Python's Makefile.
+ """
+ if compiler.compiler_type == "unix":
+ if sys.platform == "darwin":
+ # Perform first-time customization of compiler-related
+ # config vars on OS X now that we know we need a compiler.
+ # This is primarily to support Pythons from binary
+ # installers. The kind and paths to build tools on
+ # the user system may vary significantly from the system
+ # that Python itself was built on. Also the user OS
+ # version and build tools may not support the same set
+ # of CPU architectures for universal builds.
+ global _config_vars
+ # Use get_config_var() to ensure _config_vars is initialized.
+ if not get_config_var('CUSTOMIZED_OSX_COMPILER'):
+ import _osx_support
+ _osx_support.customize_compiler(_config_vars)
+ _config_vars['CUSTOMIZED_OSX_COMPILER'] = 'True'
+
+ (cc, cxx, opt, cflags, ccshared, ldshared, so_ext, ar, ar_flags) = \
+ get_config_vars('CC', 'CXX', 'OPT', 'CFLAGS',
+ 'CCSHARED', 'LDSHARED', 'SO', 'AR',
+ 'ARFLAGS')
+
+ if 'CC' in os.environ:
+ newcc = os.environ['CC']
+ if (sys.platform == 'darwin'
+ and 'LDSHARED' not in os.environ
+ and ldshared.startswith(cc)):
+ # On OS X, if CC is overridden, use that as the default
+ # command for LDSHARED as well
+ ldshared = newcc + ldshared[len(cc):]
+ cc = newcc
+ if 'CXX' in os.environ:
+ cxx = os.environ['CXX']
+ if 'LDSHARED' in os.environ:
+ ldshared = os.environ['LDSHARED']
+ if 'CPP' in os.environ:
+ cpp = os.environ['CPP']
+ else:
+ cpp = cc + " -E" # not always
+ if 'LDFLAGS' in os.environ:
+ ldshared = ldshared + ' ' + os.environ['LDFLAGS']
+ if 'CFLAGS' in os.environ:
+ cflags = opt + ' ' + os.environ['CFLAGS']
+ ldshared = ldshared + ' ' + os.environ['CFLAGS']
+ if 'CPPFLAGS' in os.environ:
+ cpp = cpp + ' ' + os.environ['CPPFLAGS']
+ cflags = cflags + ' ' + os.environ['CPPFLAGS']
+ ldshared = ldshared + ' ' + os.environ['CPPFLAGS']
+ if 'AR' in os.environ:
+ ar = os.environ['AR']
+ if 'ARFLAGS' in os.environ:
+ archiver = ar + ' ' + os.environ['ARFLAGS']
+ else:
+ archiver = ar + ' ' + ar_flags
+
+ cc_cmd = cc + ' ' + cflags
+ compiler.set_executables(
+ preprocessor=cpp,
+ compiler=cc_cmd,
+ compiler_so=cc_cmd + ' ' + ccshared,
+ compiler_cxx=cxx,
+ linker_so=ldshared,
+ linker_exe=cc,
+ archiver=archiver)
+
+ compiler.shared_lib_extension = so_ext
+
+
+def get_config_h_filename():
+ """Return full pathname of installed pyconfig.h file."""
+ if python_build:
+ if os.name == "nt":
+ inc_dir = os.path.join(project_base, "PC")
+ else:
+ inc_dir = project_base
+ else:
+ inc_dir = get_python_inc(plat_specific=1)
+ if get_python_version() < '2.2':
+ config_h = 'config.h'
+ else:
+ # The name of the config.h file changed in 2.2
+ config_h = 'pyconfig.h'
+ return os.path.join(inc_dir, config_h)
+
+
+def get_makefile_filename():
+ """Return full pathname of installed Makefile from the Python build."""
+ if python_build:
+ return os.path.join(project_base, "Makefile")
+ lib_dir = get_python_lib(plat_specific=1, standard_lib=1)
+ return os.path.join(lib_dir, "config", "Makefile")
+
+
+def parse_config_h(fp, g=None):
+ """Parse a config.h-style file.
+
+ A dictionary containing name/value pairs is returned. If an
+ optional dictionary is passed in as the second argument, it is
+ used instead of a new dictionary.
+ """
+ if g is None:
+ g = {}
+ define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
+ undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
+ #
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ m = define_rx.match(line)
+ if m:
+ n, v = m.group(1, 2)
+ try: v = int(v)
+ except ValueError: pass
+ g[n] = v
+ else:
+ m = undef_rx.match(line)
+ if m:
+ g[m.group(1)] = 0
+ return g
+
+
+# Regexes needed for parsing Makefile (and similar syntaxes,
+# like old-style Setup files).
+_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
+_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
+_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
+
+def parse_makefile(fn, g=None):
+ """Parse a Makefile-style file.
+
+ A dictionary containing name/value pairs is returned. If an
+ optional dictionary is passed in as the second argument, it is
+ used instead of a new dictionary.
+ """
+ from distutils.text_file import TextFile
+ fp = TextFile(fn, strip_comments=1, skip_blanks=1, join_lines=1)
+
+ if g is None:
+ g = {}
+ done = {}
+ notdone = {}
+
+ while 1:
+ line = fp.readline()
+ if line is None: # eof
+ break
+ m = _variable_rx.match(line)
+ if m:
+ n, v = m.group(1, 2)
+ v = v.strip()
+ # `$$' is a literal `$' in make
+ tmpv = v.replace('$$', '')
+
+ if "$" in tmpv:
+ notdone[n] = v
+ else:
+ try:
+ v = int(v)
+ except ValueError:
+ # insert literal `$'
+ done[n] = v.replace('$$', '$')
+ else:
+ done[n] = v
+
+ # do variable interpolation here
+ while notdone:
+ for name in notdone.keys():
+ value = notdone[name]
+ m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
+ if m:
+ n = m.group(1)
+ found = True
+ if n in done:
+ item = str(done[n])
+ elif n in notdone:
+ # get it on a subsequent round
+ found = False
+ elif n in os.environ:
+ # do it like make: fall back to environment
+ item = os.environ[n]
+ else:
+ done[n] = item = ""
+ if found:
+ after = value[m.end():]
+ value = value[:m.start()] + item + after
+ if "$" in after:
+ notdone[name] = value
+ else:
+ try: value = int(value)
+ except ValueError:
+ done[name] = value.strip()
+ else:
+ done[name] = value
+ del notdone[name]
+ else:
+ # bogus variable reference; just drop it since we can't deal
+ del notdone[name]
+
+ fp.close()
+
+ # strip spurious spaces
+ for k, v in done.items():
+ if isinstance(v, str):
+ done[k] = v.strip()
+
+ # save the results in the global dictionary
+ g.update(done)
+ return g
+
+
+def expand_makefile_vars(s, vars):
+ """Expand Makefile-style variables -- "${foo}" or "$(foo)" -- in
+ 'string' according to 'vars' (a dictionary mapping variable names to
+ values). Variables not present in 'vars' are silently expanded to the
+ empty string. The variable values in 'vars' should not contain further
+ variable expansions; if 'vars' is the output of 'parse_makefile()',
+ you're fine. Returns a variable-expanded version of 's'.
+ """
+
+ # This algorithm does multiple expansion, so if vars['foo'] contains
+ # "${bar}", it will expand ${foo} to ${bar}, and then expand
+ # ${bar}... and so forth. This is fine as long as 'vars' comes from
+ # 'parse_makefile()', which takes care of such expansions eagerly,
+ # according to make's variable expansion semantics.
+
+ while 1:
+ m = _findvar1_rx.search(s) or _findvar2_rx.search(s)
+ if m:
+ (beg, end) = m.span()
+ s = s[0:beg] + vars.get(m.group(1)) + s[end:]
+ else:
+ break
+ return s
+
+
+_config_vars = None
+
+def _init_posix():
+ """Initialize the module as appropriate for POSIX systems."""
+ # _sysconfigdata is generated at build time, see the sysconfig module
+ from _sysconfigdata import build_time_vars
+ global _config_vars
+ _config_vars = {}
+ _config_vars.update(build_time_vars)
+
+
+def _init_nt():
+ """Initialize the module as appropriate for NT"""
+ g = {}
+ # set basic install directories
+ g['LIBDEST'] = get_python_lib(plat_specific=0, standard_lib=1)
+ g['BINLIBDEST'] = get_python_lib(plat_specific=1, standard_lib=1)
+
+ # XXX hmmm.. a normal install puts include files here
+ g['INCLUDEPY'] = get_python_inc(plat_specific=0)
+
+ g['SO'] = '.pyd'
+ g['EXE'] = ".exe"
+ g['VERSION'] = get_python_version().replace(".", "")
+ g['BINDIR'] = os.path.dirname(os.path.abspath(sys.executable))
+
+ global _config_vars
+ _config_vars = g
+
+
+def _init_os2():
+ """Initialize the module as appropriate for OS/2"""
+ g = {}
+ # set basic install directories
+ g['LIBDEST'] = get_python_lib(plat_specific=0, standard_lib=1)
+ g['BINLIBDEST'] = get_python_lib(plat_specific=1, standard_lib=1)
+
+ # XXX hmmm.. a normal install puts include files here
+ g['INCLUDEPY'] = get_python_inc(plat_specific=0)
+
+ g['SO'] = '.pyd'
+ g['EXE'] = ".exe"
+
+ global _config_vars
+ _config_vars = g
+
+
+def get_config_vars(*args):
+ """With no arguments, return a dictionary of all configuration
+ variables relevant for the current platform. Generally this includes
+ everything needed to build extensions and install both pure modules and
+ extensions. On Unix, this means every variable defined in Python's
+ installed Makefile; on Windows and Mac OS it's a much smaller set.
+
+ With arguments, return a list of values that result from looking up
+ each argument in the configuration variable dictionary.
+ """
+ global _config_vars
+ if _config_vars is None:
+ func = globals().get("_init_" + os.name)
+ if func:
+ func()
+ else:
+ _config_vars = {}
+
+ # Normalized versions of prefix and exec_prefix are handy to have;
+ # in fact, these are the standard versions used most places in the
+ # Distutils.
+ _config_vars['prefix'] = PREFIX
+ _config_vars['exec_prefix'] = EXEC_PREFIX
+
+ # OS X platforms require special customization to handle
+ # multi-architecture, multi-os-version installers
+ if sys.platform == 'darwin':
+ import _osx_support
+ _osx_support.customize_config_vars(_config_vars)
+
+ if args:
+ vals = []
+ for name in args:
+ vals.append(_config_vars.get(name))
+ return vals
+ else:
+ return _config_vars
+
+def get_config_var(name):
+ """Return the value of a single variable using the dictionary
+ returned by 'get_config_vars()'. Equivalent to
+ get_config_vars().get(name)
+ """
+ return get_config_vars().get(name)
diff --git a/cashew/Lib/distutils/text_file.py b/cashew/Lib/distutils/text_file.py
new file mode 100644
index 0000000..690cb80
--- /dev/null
+++ b/cashew/Lib/distutils/text_file.py
@@ -0,0 +1,304 @@
+"""text_file
+
+provides the TextFile class, which gives an interface to text files
+that (optionally) takes care of stripping comments, ignoring blank
+lines, and joining lines with backslashes."""
+
+__revision__ = "$Id$"
+
+import sys
+
+
+class TextFile:
+
+ """Provides a file-like object that takes care of all the things you
+ commonly want to do when processing a text file that has some
+ line-by-line syntax: strip comments (as long as "#" is your
+ comment character), skip blank lines, join adjacent lines by
+ escaping the newline (ie. backslash at end of line), strip
+ leading and/or trailing whitespace. All of these are optional
+ and independently controllable.
+
+ Provides a 'warn()' method so you can generate warning messages that
+ report physical line number, even if the logical line in question
+ spans multiple physical lines. Also provides 'unreadline()' for
+ implementing line-at-a-time lookahead.
+
+ Constructor is called as:
+
+ TextFile (filename=None, file=None, **options)
+
+ It bombs (RuntimeError) if both 'filename' and 'file' are None;
+ 'filename' should be a string, and 'file' a file object (or
+ something that provides 'readline()' and 'close()' methods). It is
+ recommended that you supply at least 'filename', so that TextFile
+ can include it in warning messages. If 'file' is not supplied,
+ TextFile creates its own using the 'open()' builtin.
+
+ The options are all boolean, and affect the value returned by
+ 'readline()':
+ strip_comments [default: true]
+ strip from "#" to end-of-line, as well as any whitespace
+ leading up to the "#" -- unless it is escaped by a backslash
+ lstrip_ws [default: false]
+ strip leading whitespace from each line before returning it
+ rstrip_ws [default: true]
+ strip trailing whitespace (including line terminator!) from
+ each line before returning it
+ skip_blanks [default: true}
+ skip lines that are empty *after* stripping comments and
+ whitespace. (If both lstrip_ws and rstrip_ws are false,
+ then some lines may consist of solely whitespace: these will
+ *not* be skipped, even if 'skip_blanks' is true.)
+ join_lines [default: false]
+ if a backslash is the last non-newline character on a line
+ after stripping comments and whitespace, join the following line
+ to it to form one "logical line"; if N consecutive lines end
+ with a backslash, then N+1 physical lines will be joined to
+ form one logical line.
+ collapse_join [default: false]
+ strip leading whitespace from lines that are joined to their
+ predecessor; only matters if (join_lines and not lstrip_ws)
+
+ Note that since 'rstrip_ws' can strip the trailing newline, the
+ semantics of 'readline()' must differ from those of the builtin file
+ object's 'readline()' method! In particular, 'readline()' returns
+ None for end-of-file: an empty string might just be a blank line (or
+ an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
+ not."""
+
+ default_options = { 'strip_comments': 1,
+ 'skip_blanks': 1,
+ 'lstrip_ws': 0,
+ 'rstrip_ws': 1,
+ 'join_lines': 0,
+ 'collapse_join': 0,
+ }
+
+ def __init__ (self, filename=None, file=None, **options):
+ """Construct a new TextFile object. At least one of 'filename'
+ (a string) and 'file' (a file-like object) must be supplied.
+ They keyword argument options are described above and affect
+ the values returned by 'readline()'."""
+
+ if filename is None and file is None:
+ raise RuntimeError, \
+ "you must supply either or both of 'filename' and 'file'"
+
+ # set values for all options -- either from client option hash
+ # or fallback to default_options
+ for opt in self.default_options.keys():
+ if opt in options:
+ setattr (self, opt, options[opt])
+
+ else:
+ setattr (self, opt, self.default_options[opt])
+
+ # sanity check client option hash
+ for opt in options.keys():
+ if opt not in self.default_options:
+ raise KeyError, "invalid TextFile option '%s'" % opt
+
+ if file is None:
+ self.open (filename)
+ else:
+ self.filename = filename
+ self.file = file
+ self.current_line = 0 # assuming that file is at BOF!
+
+ # 'linebuf' is a stack of lines that will be emptied before we
+ # actually read from the file; it's only populated by an
+ # 'unreadline()' operation
+ self.linebuf = []
+
+
+ def open (self, filename):
+ """Open a new file named 'filename'. This overrides both the
+ 'filename' and 'file' arguments to the constructor."""
+
+ self.filename = filename
+ self.file = open (self.filename, 'r')
+ self.current_line = 0
+
+
+ def close (self):
+ """Close the current file and forget everything we know about it
+ (filename, current line number)."""
+ file = self.file
+ self.file = None
+ self.filename = None
+ self.current_line = None
+ file.close()
+
+
+ def gen_error (self, msg, line=None):
+ outmsg = []
+ if line is None:
+ line = self.current_line
+ outmsg.append(self.filename + ", ")
+ if isinstance(line, (list, tuple)):
+ outmsg.append("lines %d-%d: " % tuple (line))
+ else:
+ outmsg.append("line %d: " % line)
+ outmsg.append(str(msg))
+ return ''.join(outmsg)
+
+
+ def error (self, msg, line=None):
+ raise ValueError, "error: " + self.gen_error(msg, line)
+
+ def warn (self, msg, line=None):
+ """Print (to stderr) a warning message tied to the current logical
+ line in the current file. If the current logical line in the
+ file spans multiple physical lines, the warning refers to the
+ whole range, eg. "lines 3-5". If 'line' supplied, it overrides
+ the current line number; it may be a list or tuple to indicate a
+ range of physical lines, or an integer for a single physical
+ line."""
+ sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
+
+
+ def readline (self):
+ """Read and return a single logical line from the current file (or
+ from an internal buffer if lines have previously been "unread"
+ with 'unreadline()'). If the 'join_lines' option is true, this
+ may involve reading multiple physical lines concatenated into a
+ single string. Updates the current line number, so calling
+ 'warn()' after 'readline()' emits a warning about the physical
+ line(s) just read. Returns None on end-of-file, since the empty
+ string can occur if 'rstrip_ws' is true but 'strip_blanks' is
+ not."""
+
+ # If any "unread" lines waiting in 'linebuf', return the top
+ # one. (We don't actually buffer read-ahead data -- lines only
+ # get put in 'linebuf' if the client explicitly does an
+ # 'unreadline()'.
+ if self.linebuf:
+ line = self.linebuf[-1]
+ del self.linebuf[-1]
+ return line
+
+ buildup_line = ''
+
+ while 1:
+ # read the line, make it None if EOF
+ line = self.file.readline()
+ if line == '': line = None
+
+ if self.strip_comments and line:
+
+ # Look for the first "#" in the line. If none, never
+ # mind. If we find one and it's the first character, or
+ # is not preceded by "\", then it starts a comment --
+ # strip the comment, strip whitespace before it, and
+ # carry on. Otherwise, it's just an escaped "#", so
+ # unescape it (and any other escaped "#"'s that might be
+ # lurking in there) and otherwise leave the line alone.
+
+ pos = line.find("#")
+ if pos == -1: # no "#" -- no comments
+ pass
+
+ # It's definitely a comment -- either "#" is the first
+ # character, or it's elsewhere and unescaped.
+ elif pos == 0 or line[pos-1] != "\\":
+ # Have to preserve the trailing newline, because it's
+ # the job of a later step (rstrip_ws) to remove it --
+ # and if rstrip_ws is false, we'd better preserve it!
+ # (NB. this means that if the final line is all comment
+ # and has no trailing newline, we will think that it's
+ # EOF; I think that's OK.)
+ eol = (line[-1] == '\n') and '\n' or ''
+ line = line[0:pos] + eol
+
+ # If all that's left is whitespace, then skip line
+ # *now*, before we try to join it to 'buildup_line' --
+ # that way constructs like
+ # hello \\
+ # # comment that should be ignored
+ # there
+ # result in "hello there".
+ if line.strip() == "":
+ continue
+
+ else: # it's an escaped "#"
+ line = line.replace("\\#", "#")
+
+
+ # did previous line end with a backslash? then accumulate
+ if self.join_lines and buildup_line:
+ # oops: end of file
+ if line is None:
+ self.warn ("continuation line immediately precedes "
+ "end-of-file")
+ return buildup_line
+
+ if self.collapse_join:
+ line = line.lstrip()
+ line = buildup_line + line
+
+ # careful: pay attention to line number when incrementing it
+ if isinstance(self.current_line, list):
+ self.current_line[1] = self.current_line[1] + 1
+ else:
+ self.current_line = [self.current_line,
+ self.current_line+1]
+ # just an ordinary line, read it as usual
+ else:
+ if line is None: # eof
+ return None
+
+ # still have to be careful about incrementing the line number!
+ if isinstance(self.current_line, list):
+ self.current_line = self.current_line[1] + 1
+ else:
+ self.current_line = self.current_line + 1
+
+
+ # strip whitespace however the client wants (leading and
+ # trailing, or one or the other, or neither)
+ if self.lstrip_ws and self.rstrip_ws:
+ line = line.strip()
+ elif self.lstrip_ws:
+ line = line.lstrip()
+ elif self.rstrip_ws:
+ line = line.rstrip()
+
+ # blank line (whether we rstrip'ed or not)? skip to next line
+ # if appropriate
+ if (line == '' or line == '\n') and self.skip_blanks:
+ continue
+
+ if self.join_lines:
+ if line[-1] == '\\':
+ buildup_line = line[:-1]
+ continue
+
+ if line[-2:] == '\\\n':
+ buildup_line = line[0:-2] + '\n'
+ continue
+
+ # well, I guess there's some actual content there: return it
+ return line
+
+ # readline ()
+
+
+ def readlines (self):
+ """Read and return the list of all logical lines remaining in the
+ current file."""
+
+ lines = []
+ while 1:
+ line = self.readline()
+ if line is None:
+ return lines
+ lines.append (line)
+
+
+ def unreadline (self, line):
+ """Push 'line' (a string) onto an internal buffer that will be
+ checked by future 'readline()' calls. Handy for implementing
+ a parser with line-at-a-time lookahead."""
+
+ self.linebuf.append (line)
diff --git a/cashew/Lib/distutils/unixccompiler.py b/cashew/Lib/distutils/unixccompiler.py
new file mode 100644
index 0000000..3af540e
--- /dev/null
+++ b/cashew/Lib/distutils/unixccompiler.py
@@ -0,0 +1,312 @@
+"""distutils.unixccompiler
+
+Contains the UnixCCompiler class, a subclass of CCompiler that handles
+the "typical" Unix-style command-line C compiler:
+ * macros defined with -Dname[=value]
+ * macros undefined with -Uname
+ * include search directories specified with -Idir
+ * libraries specified with -lllib
+ * library search directories specified with -Ldir
+ * compile handled by 'cc' (or similar) executable with -c option:
+ compiles .c to .o
+ * link static library handled by 'ar' command (possibly with 'ranlib')
+ * link shared library handled by 'cc -shared'
+"""
+
+__revision__ = "$Id$"
+
+import os, sys, re
+from types import StringType, NoneType
+
+from distutils import sysconfig
+from distutils.dep_util import newer
+from distutils.ccompiler import \
+ CCompiler, gen_preprocess_options, gen_lib_options
+from distutils.errors import \
+ DistutilsExecError, CompileError, LibError, LinkError
+from distutils import log
+
+if sys.platform == 'darwin':
+ import _osx_support
+
+# XXX Things not currently handled:
+# * optimization/debug/warning flags; we just use whatever's in Python's
+# Makefile and live with it. Is this adequate? If not, we might
+# have to have a bunch of subclasses GNUCCompiler, SGICCompiler,
+# SunCCompiler, and I suspect down that road lies madness.
+# * even if we don't know a warning flag from an optimization flag,
+# we need some way for outsiders to feed preprocessor/compiler/linker
+# flags in to us -- eg. a sysadmin might want to mandate certain flags
+# via a site config file, or a user might want to set something for
+# compiling this module distribution only via the setup.py command
+# line, whatever. As long as these options come from something on the
+# current system, they can be as system-dependent as they like, and we
+# should just happily stuff them into the preprocessor/compiler/linker
+# options and carry on.
+
+
+class UnixCCompiler(CCompiler):
+
+ compiler_type = 'unix'
+
+ # These are used by CCompiler in two places: the constructor sets
+ # instance attributes 'preprocessor', 'compiler', etc. from them, and
+ # 'set_executable()' allows any of these to be set. The defaults here
+ # are pretty generic; they will probably have to be set by an outsider
+ # (eg. using information discovered by the sysconfig about building
+ # Python extensions).
+ executables = {'preprocessor' : None,
+ 'compiler' : ["cc"],
+ 'compiler_so' : ["cc"],
+ 'compiler_cxx' : ["cc"],
+ 'linker_so' : ["cc", "-shared"],
+ 'linker_exe' : ["cc"],
+ 'archiver' : ["ar", "-cr"],
+ 'ranlib' : None,
+ }
+
+ if sys.platform[:6] == "darwin":
+ executables['ranlib'] = ["ranlib"]
+
+ # Needed for the filename generation methods provided by the base
+ # class, CCompiler. NB. whoever instantiates/uses a particular
+ # UnixCCompiler instance should set 'shared_lib_ext' -- we set a
+ # reasonable common default here, but it's not necessarily used on all
+ # Unices!
+
+ src_extensions = [".c",".C",".cc",".cxx",".cpp",".m"]
+ obj_extension = ".o"
+ static_lib_extension = ".a"
+ shared_lib_extension = ".so"
+ dylib_lib_extension = ".dylib"
+ xcode_stub_lib_extension = ".tbd"
+ static_lib_format = shared_lib_format = dylib_lib_format = "lib%s%s"
+ xcode_stub_lib_format = dylib_lib_format
+ if sys.platform == "cygwin":
+ exe_extension = ".exe"
+
+ def preprocess(self, source,
+ output_file=None, macros=None, include_dirs=None,
+ extra_preargs=None, extra_postargs=None):
+ ignore, macros, include_dirs = \
+ self._fix_compile_args(None, macros, include_dirs)
+ pp_opts = gen_preprocess_options(macros, include_dirs)
+ pp_args = self.preprocessor + pp_opts
+ if output_file:
+ pp_args.extend(['-o', output_file])
+ if extra_preargs:
+ pp_args[:0] = extra_preargs
+ if extra_postargs:
+ pp_args.extend(extra_postargs)
+ pp_args.append(source)
+
+ # We need to preprocess: either we're being forced to, or we're
+ # generating output to stdout, or there's a target output file and
+ # the source file is newer than the target (or the target doesn't
+ # exist).
+ if self.force or output_file is None or newer(source, output_file):
+ if output_file:
+ self.mkpath(os.path.dirname(output_file))
+ try:
+ self.spawn(pp_args)
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+ compiler_so = self.compiler_so
+ if sys.platform == 'darwin':
+ compiler_so = _osx_support.compiler_fixup(compiler_so,
+ cc_args + extra_postargs)
+ try:
+ self.spawn(compiler_so + cc_args + [src, '-o', obj] +
+ extra_postargs)
+ except DistutilsExecError, msg:
+ raise CompileError, msg
+
+ def create_static_lib(self, objects, output_libname,
+ output_dir=None, debug=0, target_lang=None):
+ objects, output_dir = self._fix_object_args(objects, output_dir)
+
+ output_filename = \
+ self.library_filename(output_libname, output_dir=output_dir)
+
+ if self._need_link(objects, output_filename):
+ self.mkpath(os.path.dirname(output_filename))
+ self.spawn(self.archiver +
+ [output_filename] +
+ objects + self.objects)
+
+ # Not many Unices required ranlib anymore -- SunOS 4.x is, I
+ # think the only major Unix that does. Maybe we need some
+ # platform intelligence here to skip ranlib if it's not
+ # needed -- or maybe Python's configure script took care of
+ # it for us, hence the check for leading colon.
+ if self.ranlib:
+ try:
+ self.spawn(self.ranlib + [output_filename])
+ except DistutilsExecError, msg:
+ raise LibError, msg
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ def link(self, target_desc, objects,
+ output_filename, output_dir=None, libraries=None,
+ library_dirs=None, runtime_library_dirs=None,
+ export_symbols=None, debug=0, extra_preargs=None,
+ extra_postargs=None, build_temp=None, target_lang=None):
+ objects, output_dir = self._fix_object_args(objects, output_dir)
+ libraries, library_dirs, runtime_library_dirs = \
+ self._fix_lib_args(libraries, library_dirs, runtime_library_dirs)
+
+ lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs,
+ libraries)
+ if type(output_dir) not in (StringType, NoneType):
+ raise TypeError, "'output_dir' must be a string or None"
+ if output_dir is not None:
+ output_filename = os.path.join(output_dir, output_filename)
+
+ if self._need_link(objects, output_filename):
+ ld_args = (objects + self.objects +
+ lib_opts + ['-o', output_filename])
+ if debug:
+ ld_args[:0] = ['-g']
+ if extra_preargs:
+ ld_args[:0] = extra_preargs
+ if extra_postargs:
+ ld_args.extend(extra_postargs)
+ self.mkpath(os.path.dirname(output_filename))
+ try:
+ if target_desc == CCompiler.EXECUTABLE:
+ linker = self.linker_exe[:]
+ else:
+ linker = self.linker_so[:]
+ if target_lang == "c++" and self.compiler_cxx:
+ # skip over environment variable settings if /usr/bin/env
+ # is used to set up the linker's environment.
+ # This is needed on OSX. Note: this assumes that the
+ # normal and C++ compiler have the same environment
+ # settings.
+ i = 0
+ if os.path.basename(linker[0]) == "env":
+ i = 1
+ while '=' in linker[i]:
+ i = i + 1
+
+ linker[i] = self.compiler_cxx[i]
+
+ if sys.platform == 'darwin':
+ linker = _osx_support.compiler_fixup(linker, ld_args)
+
+ self.spawn(linker + ld_args)
+ except DistutilsExecError, msg:
+ raise LinkError, msg
+ else:
+ log.debug("skipping %s (up-to-date)", output_filename)
+
+ # -- Miscellaneous methods -----------------------------------------
+ # These are all used by the 'gen_lib_options() function, in
+ # ccompiler.py.
+
+ def library_dir_option(self, dir):
+ return "-L" + dir
+
+ def _is_gcc(self, compiler_name):
+ return "gcc" in compiler_name or "g++" in compiler_name
+
+ def runtime_library_dir_option(self, dir):
+ # XXX Hackish, at the very least. See Python bug #445902:
+ # http://sourceforge.net/tracker/index.php
+ # ?func=detail&aid=445902&group_id=5470&atid=105470
+ # Linkers on different platforms need different options to
+ # specify that directories need to be added to the list of
+ # directories searched for dependencies when a dynamic library
+ # is sought. GCC has to be told to pass the -R option through
+ # to the linker, whereas other compilers just know this.
+ # Other compilers may need something slightly different. At
+ # this time, there's no way to determine this information from
+ # the configuration data stored in the Python installation, so
+ # we use this hack.
+ compiler = os.path.basename(sysconfig.get_config_var("CC"))
+ if sys.platform[:6] == "darwin":
+ # MacOSX's linker doesn't understand the -R flag at all
+ return "-L" + dir
+ elif sys.platform[:7] == "freebsd":
+ return "-Wl,-rpath=" + dir
+ elif sys.platform[:5] == "hp-ux":
+ if self._is_gcc(compiler):
+ return ["-Wl,+s", "-L" + dir]
+ return ["+s", "-L" + dir]
+ elif sys.platform[:7] == "irix646" or sys.platform[:6] == "osf1V5":
+ return ["-rpath", dir]
+ elif self._is_gcc(compiler):
+ return "-Wl,-R" + dir
+ else:
+ return "-R" + dir
+
+ def library_option(self, lib):
+ return "-l" + lib
+
+ def find_library_file(self, dirs, lib, debug=0):
+ shared_f = self.library_filename(lib, lib_type='shared')
+ dylib_f = self.library_filename(lib, lib_type='dylib')
+ xcode_stub_f = self.library_filename(lib, lib_type='xcode_stub')
+ static_f = self.library_filename(lib, lib_type='static')
+
+ if sys.platform == 'darwin':
+ # On OSX users can specify an alternate SDK using
+ # '-isysroot', calculate the SDK root if it is specified
+ # (and use it further on)
+ #
+ # Note that, as of Xcode 7, Apple SDKs may contain textual stub
+ # libraries with .tbd extensions rather than the normal .dylib
+ # shared libraries installed in /. The Apple compiler tool
+ # chain handles this transparently but it can cause problems
+ # for programs that are being built with an SDK and searching
+ # for specific libraries. Callers of find_library_file need to
+ # keep in mind that the base filename of the returned SDK library
+ # file might have a different extension from that of the library
+ # file installed on the running system, for example:
+ # /Applications/Xcode.app/Contents/Developer/Platforms/
+ # MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/
+ # usr/lib/libedit.tbd
+ # vs
+ # /usr/lib/libedit.dylib
+ cflags = sysconfig.get_config_var('CFLAGS')
+ m = re.search(r'-isysroot\s+(\S+)', cflags)
+ if m is None:
+ sysroot = '/'
+ else:
+ sysroot = m.group(1)
+
+
+
+ for dir in dirs:
+ shared = os.path.join(dir, shared_f)
+ dylib = os.path.join(dir, dylib_f)
+ static = os.path.join(dir, static_f)
+ xcode_stub = os.path.join(dir, xcode_stub_f)
+
+ if sys.platform == 'darwin' and (
+ dir.startswith('/System/') or (
+ dir.startswith('/usr/') and not dir.startswith('/usr/local/'))):
+
+ shared = os.path.join(sysroot, dir[1:], shared_f)
+ dylib = os.path.join(sysroot, dir[1:], dylib_f)
+ static = os.path.join(sysroot, dir[1:], static_f)
+ xcode_stub = os.path.join(sysroot, dir[1:], xcode_stub_f)
+
+ # We're second-guessing the linker here, with not much hard
+ # data to go on: GCC seems to prefer the shared library, so I'm
+ # assuming that *all* Unix C compilers do. And of course I'm
+ # ignoring even GCC's "-static" option. So sue me.
+ if os.path.exists(dylib):
+ return dylib
+ elif os.path.exists(xcode_stub):
+ return xcode_stub
+ elif os.path.exists(shared):
+ return shared
+ elif os.path.exists(static):
+ return static
+
+ # Oops, didn't find it in *any* of 'dirs'
+ return None
diff --git a/cashew/Lib/distutils/util.py b/cashew/Lib/distutils/util.py
new file mode 100644
index 0000000..2b4d784
--- /dev/null
+++ b/cashew/Lib/distutils/util.py
@@ -0,0 +1,477 @@
+"""distutils.util
+
+Miscellaneous utility functions -- anything that doesn't fit into
+one of the other *util.py modules.
+"""
+
+__revision__ = "$Id$"
+
+import sys, os, string, re
+from distutils.errors import DistutilsPlatformError
+from distutils.dep_util import newer
+from distutils.spawn import spawn
+from distutils import log
+from distutils.errors import DistutilsByteCompileError
+
+def get_platform ():
+ """Return a string that identifies the current platform. This is used
+ mainly to distinguish platform-specific build directories and
+ platform-specific built distributions. Typically includes the OS name
+ and version and the architecture (as supplied by 'os.uname()'),
+ although the exact information included depends on the OS; eg. for IRIX
+ the architecture isn't particularly important (IRIX only runs on SGI
+ hardware), but for Linux the kernel version isn't particularly
+ important.
+
+ Examples of returned values:
+ linux-i586
+ linux-alpha (?)
+ solaris-2.6-sun4u
+ irix-5.3
+ irix64-6.2
+
+ Windows will return one of:
+ win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
+ win-ia64 (64bit Windows on Itanium)
+ win32 (all others - specifically, sys.platform is returned)
+
+ For other non-POSIX platforms, currently just returns 'sys.platform'.
+ """
+ if os.name == 'nt':
+ # sniff sys.version for architecture.
+ prefix = " bit ("
+ i = string.find(sys.version, prefix)
+ if i == -1:
+ return sys.platform
+ j = string.find(sys.version, ")", i)
+ look = sys.version[i+len(prefix):j].lower()
+ if look=='amd64':
+ return 'win-amd64'
+ if look=='itanium':
+ return 'win-ia64'
+ return sys.platform
+
+ # Set for cross builds explicitly
+ if "_PYTHON_HOST_PLATFORM" in os.environ:
+ return os.environ["_PYTHON_HOST_PLATFORM"]
+
+ if os.name != "posix" or not hasattr(os, 'uname'):
+ # XXX what about the architecture? NT is Intel or Alpha,
+ # Mac OS is M68k or PPC, etc.
+ return sys.platform
+
+ # Try to distinguish various flavours of Unix
+
+ (osname, host, release, version, machine) = os.uname()
+
+ # Convert the OS name to lowercase, remove '/' characters
+ # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
+ osname = string.lower(osname)
+ osname = string.replace(osname, '/', '')
+ machine = string.replace(machine, ' ', '_')
+ machine = string.replace(machine, '/', '-')
+
+ if osname[:5] == "linux":
+ # At least on Linux/Intel, 'machine' is the processor --
+ # i386, etc.
+ # XXX what about Alpha, SPARC, etc?
+ return "%s-%s" % (osname, machine)
+ elif osname[:5] == "sunos":
+ if release[0] >= "5": # SunOS 5 == Solaris 2
+ osname = "solaris"
+ release = "%d.%s" % (int(release[0]) - 3, release[2:])
+ # We can't use "platform.architecture()[0]" because a
+ # bootstrap problem. We use a dict to get an error
+ # if some suspicious happens.
+ bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
+ machine += ".%s" % bitness[sys.maxint]
+ # fall through to standard osname-release-machine representation
+ elif osname[:4] == "irix": # could be "irix64"!
+ return "%s-%s" % (osname, release)
+ elif osname[:3] == "aix":
+ return "%s-%s.%s" % (osname, version, release)
+ elif osname[:6] == "cygwin":
+ osname = "cygwin"
+ rel_re = re.compile (r'[\d.]+')
+ m = rel_re.match(release)
+ if m:
+ release = m.group()
+ elif osname[:6] == "darwin":
+ import _osx_support, distutils.sysconfig
+ osname, release, machine = _osx_support.get_platform_osx(
+ distutils.sysconfig.get_config_vars(),
+ osname, release, machine)
+
+ return "%s-%s-%s" % (osname, release, machine)
+
+# get_platform ()
+
+
+def convert_path (pathname):
+ """Return 'pathname' as a name that will work on the native filesystem,
+ i.e. split it on '/' and put it back together again using the current
+ directory separator. Needed because filenames in the setup script are
+ always supplied in Unix style, and have to be converted to the local
+ convention before we can actually use them in the filesystem. Raises
+ ValueError on non-Unix-ish systems if 'pathname' either starts or
+ ends with a slash.
+ """
+ if os.sep == '/':
+ return pathname
+ if not pathname:
+ return pathname
+ if pathname[0] == '/':
+ raise ValueError, "path '%s' cannot be absolute" % pathname
+ if pathname[-1] == '/':
+ raise ValueError, "path '%s' cannot end with '/'" % pathname
+
+ paths = string.split(pathname, '/')
+ while '.' in paths:
+ paths.remove('.')
+ if not paths:
+ return os.curdir
+ return os.path.join(*paths)
+
+# convert_path ()
+
+
+def change_root (new_root, pathname):
+ """Return 'pathname' with 'new_root' prepended. If 'pathname' is
+ relative, this is equivalent to "os.path.join(new_root,pathname)".
+ Otherwise, it requires making 'pathname' relative and then joining the
+ two, which is tricky on DOS/Windows and Mac OS.
+ """
+ if os.name == 'posix':
+ if not os.path.isabs(pathname):
+ return os.path.join(new_root, pathname)
+ else:
+ return os.path.join(new_root, pathname[1:])
+
+ elif os.name == 'nt':
+ (drive, path) = os.path.splitdrive(pathname)
+ if path[0] == '\\':
+ path = path[1:]
+ return os.path.join(new_root, path)
+
+ elif os.name == 'os2':
+ (drive, path) = os.path.splitdrive(pathname)
+ if path[0] == os.sep:
+ path = path[1:]
+ return os.path.join(new_root, path)
+
+ else:
+ raise DistutilsPlatformError, \
+ "nothing known about platform '%s'" % os.name
+
+
+_environ_checked = 0
+def check_environ ():
+ """Ensure that 'os.environ' has all the environment variables we
+ guarantee that users can use in config files, command-line options,
+ etc. Currently this includes:
+ HOME - user's home directory (Unix only)
+ PLAT - description of the current platform, including hardware
+ and OS (see 'get_platform()')
+ """
+ global _environ_checked
+ if _environ_checked:
+ return
+
+ if os.name == 'posix' and 'HOME' not in os.environ:
+ import pwd
+ os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
+
+ if 'PLAT' not in os.environ:
+ os.environ['PLAT'] = get_platform()
+
+ _environ_checked = 1
+
+
+def subst_vars (s, local_vars):
+ """Perform shell/Perl-style variable substitution on 'string'. Every
+ occurrence of '$' followed by a name is considered a variable, and
+ variable is substituted by the value found in the 'local_vars'
+ dictionary, or in 'os.environ' if it's not in 'local_vars'.
+ 'os.environ' is first checked/augmented to guarantee that it contains
+ certain values: see 'check_environ()'. Raise ValueError for any
+ variables not found in either 'local_vars' or 'os.environ'.
+ """
+ check_environ()
+ def _subst (match, local_vars=local_vars):
+ var_name = match.group(1)
+ if var_name in local_vars:
+ return str(local_vars[var_name])
+ else:
+ return os.environ[var_name]
+
+ try:
+ return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
+ except KeyError, var:
+ raise ValueError, "invalid variable '$%s'" % var
+
+# subst_vars ()
+
+
+def grok_environment_error (exc, prefix="error: "):
+ # Function kept for backward compatibility.
+ # Used to try clever things with EnvironmentErrors,
+ # but nowadays str(exception) produces good messages.
+ return prefix + str(exc)
+
+
+# Needed by 'split_quoted()'
+_wordchars_re = _squote_re = _dquote_re = None
+def _init_regex():
+ global _wordchars_re, _squote_re, _dquote_re
+ _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
+ _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
+ _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+
+def split_quoted (s):
+ """Split a string up according to Unix shell-like rules for quotes and
+ backslashes. In short: words are delimited by spaces, as long as those
+ spaces are not escaped by a backslash, or inside a quoted string.
+ Single and double quotes are equivalent, and the quote characters can
+ be backslash-escaped. The backslash is stripped from any two-character
+ escape sequence, leaving only the escaped character. The quote
+ characters are stripped from any quoted string. Returns a list of
+ words.
+ """
+
+ # This is a nice algorithm for splitting up a single string, since it
+ # doesn't require character-by-character examination. It was a little
+ # bit of a brain-bender to get it working right, though...
+ if _wordchars_re is None: _init_regex()
+
+ s = string.strip(s)
+ words = []
+ pos = 0
+
+ while s:
+ m = _wordchars_re.match(s, pos)
+ end = m.end()
+ if end == len(s):
+ words.append(s[:end])
+ break
+
+ if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
+ words.append(s[:end]) # we definitely have a word delimiter
+ s = string.lstrip(s[end:])
+ pos = 0
+
+ elif s[end] == '\\': # preserve whatever is being escaped;
+ # will become part of the current word
+ s = s[:end] + s[end+1:]
+ pos = end+1
+
+ else:
+ if s[end] == "'": # slurp singly-quoted string
+ m = _squote_re.match(s, end)
+ elif s[end] == '"': # slurp doubly-quoted string
+ m = _dquote_re.match(s, end)
+ else:
+ raise RuntimeError, \
+ "this can't happen (bad char '%c')" % s[end]
+
+ if m is None:
+ raise ValueError, \
+ "bad string (mismatched %s quotes?)" % s[end]
+
+ (beg, end) = m.span()
+ s = s[:beg] + s[beg+1:end-1] + s[end:]
+ pos = m.end() - 2
+
+ if pos >= len(s):
+ words.append(s)
+ break
+
+ return words
+
+# split_quoted ()
+
+
+def execute (func, args, msg=None, verbose=0, dry_run=0):
+ """Perform some action that affects the outside world (eg. by
+ writing to the filesystem). Such actions are special because they
+ are disabled by the 'dry_run' flag. This method takes care of all
+ that bureaucracy for you; all you have to do is supply the
+ function to call and an argument tuple for it (to embody the
+ "external action" being performed), and an optional message to
+ print.
+ """
+ if msg is None:
+ msg = "%s%r" % (func.__name__, args)
+ if msg[-2:] == ',)': # correct for singleton tuple
+ msg = msg[0:-2] + ')'
+
+ log.info(msg)
+ if not dry_run:
+ func(*args)
+
+
+def strtobool (val):
+ """Convert a string representation of truth to true (1) or false (0).
+
+ True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+ are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
+ 'val' is anything else.
+ """
+ val = string.lower(val)
+ if val in ('y', 'yes', 't', 'true', 'on', '1'):
+ return 1
+ elif val in ('n', 'no', 'f', 'false', 'off', '0'):
+ return 0
+ else:
+ raise ValueError, "invalid truth value %r" % (val,)
+
+
+def byte_compile (py_files,
+ optimize=0, force=0,
+ prefix=None, base_dir=None,
+ verbose=1, dry_run=0,
+ direct=None):
+ """Byte-compile a collection of Python source files to either .pyc
+ or .pyo files in the same directory. 'py_files' is a list of files
+ to compile; any files that don't end in ".py" are silently skipped.
+ 'optimize' must be one of the following:
+ 0 - don't optimize (generate .pyc)
+ 1 - normal optimization (like "python -O")
+ 2 - extra optimization (like "python -OO")
+ If 'force' is true, all files are recompiled regardless of
+ timestamps.
+
+ The source filename encoded in each bytecode file defaults to the
+ filenames listed in 'py_files'; you can modify these with 'prefix' and
+ 'basedir'. 'prefix' is a string that will be stripped off of each
+ source filename, and 'base_dir' is a directory name that will be
+ prepended (after 'prefix' is stripped). You can supply either or both
+ (or neither) of 'prefix' and 'base_dir', as you wish.
+
+ If 'dry_run' is true, doesn't actually do anything that would
+ affect the filesystem.
+
+ Byte-compilation is either done directly in this interpreter process
+ with the standard py_compile module, or indirectly by writing a
+ temporary script and executing it. Normally, you should let
+ 'byte_compile()' figure out to use direct compilation or not (see
+ the source for details). The 'direct' flag is used by the script
+ generated in indirect mode; unless you know what you're doing, leave
+ it set to None.
+ """
+ # nothing is done if sys.dont_write_bytecode is True
+ if sys.dont_write_bytecode:
+ raise DistutilsByteCompileError('byte-compiling is disabled.')
+
+ # First, if the caller didn't force us into direct or indirect mode,
+ # figure out which mode we should be in. We take a conservative
+ # approach: choose direct mode *only* if the current interpreter is
+ # in debug mode and optimize is 0. If we're not in debug mode (-O
+ # or -OO), we don't know which level of optimization this
+ # interpreter is running with, so we can't do direct
+ # byte-compilation and be certain that it's the right thing. Thus,
+ # always compile indirectly if the current interpreter is in either
+ # optimize mode, or if either optimization level was requested by
+ # the caller.
+ if direct is None:
+ direct = (__debug__ and optimize == 0)
+
+ # "Indirect" byte-compilation: write a temporary script and then
+ # run it with the appropriate flags.
+ if not direct:
+ try:
+ from tempfile import mkstemp
+ (script_fd, script_name) = mkstemp(".py")
+ except ImportError:
+ from tempfile import mktemp
+ (script_fd, script_name) = None, mktemp(".py")
+ log.info("writing byte-compilation script '%s'", script_name)
+ if not dry_run:
+ if script_fd is not None:
+ script = os.fdopen(script_fd, "w")
+ else:
+ script = open(script_name, "w")
+
+ script.write("""\
+from distutils.util import byte_compile
+files = [
+""")
+
+ # XXX would be nice to write absolute filenames, just for
+ # safety's sake (script should be more robust in the face of
+ # chdir'ing before running it). But this requires abspath'ing
+ # 'prefix' as well, and that breaks the hack in build_lib's
+ # 'byte_compile()' method that carefully tacks on a trailing
+ # slash (os.sep really) to make sure the prefix here is "just
+ # right". This whole prefix business is rather delicate -- the
+ # problem is that it's really a directory, but I'm treating it
+ # as a dumb string, so trailing slashes and so forth matter.
+
+ #py_files = map(os.path.abspath, py_files)
+ #if prefix:
+ # prefix = os.path.abspath(prefix)
+
+ script.write(string.join(map(repr, py_files), ",\n") + "]\n")
+ script.write("""
+byte_compile(files, optimize=%r, force=%r,
+ prefix=%r, base_dir=%r,
+ verbose=%r, dry_run=0,
+ direct=1)
+""" % (optimize, force, prefix, base_dir, verbose))
+
+ script.close()
+
+ cmd = [sys.executable, script_name]
+ if optimize == 1:
+ cmd.insert(1, "-O")
+ elif optimize == 2:
+ cmd.insert(1, "-OO")
+ spawn(cmd, dry_run=dry_run)
+ execute(os.remove, (script_name,), "removing %s" % script_name,
+ dry_run=dry_run)
+
+ # "Direct" byte-compilation: use the py_compile module to compile
+ # right here, right now. Note that the script generated in indirect
+ # mode simply calls 'byte_compile()' in direct mode, a weird sort of
+ # cross-process recursion. Hey, it works!
+ else:
+ from py_compile import compile
+
+ for file in py_files:
+ if file[-3:] != ".py":
+ # This lets us be lazy and not filter filenames in
+ # the "install_lib" command.
+ continue
+
+ # Terminology from the py_compile module:
+ # cfile - byte-compiled file
+ # dfile - purported source filename (same as 'file' by default)
+ cfile = file + (__debug__ and "c" or "o")
+ dfile = file
+ if prefix:
+ if file[:len(prefix)] != prefix:
+ raise ValueError, \
+ ("invalid prefix: filename %r doesn't start with %r"
+ % (file, prefix))
+ dfile = dfile[len(prefix):]
+ if base_dir:
+ dfile = os.path.join(base_dir, dfile)
+
+ cfile_base = os.path.basename(cfile)
+ if direct:
+ if force or newer(file, cfile):
+ log.info("byte-compiling %s to %s", file, cfile_base)
+ if not dry_run:
+ compile(file, cfile, dfile)
+ else:
+ log.debug("skipping byte-compilation of %s to %s",
+ file, cfile_base)
+
+# byte_compile ()
+
+def rfc822_escape (header):
+ """Return a version of the string escaped for inclusion in an
+ RFC-822 header, by ensuring there are 8 spaces space after each newline.
+ """
+ lines = string.split(header, '\n')
+ header = string.join(lines, '\n' + 8*' ')
+ return header
diff --git a/cashew/Lib/distutils/version.py b/cashew/Lib/distutils/version.py
new file mode 100644
index 0000000..0fb5b6e
--- /dev/null
+++ b/cashew/Lib/distutils/version.py
@@ -0,0 +1,299 @@
+#
+# distutils/version.py
+#
+# Implements multiple version numbering conventions for the
+# Python Module Distribution Utilities.
+#
+# $Id$
+#
+
+"""Provides classes to represent module version numbers (one class for
+each style of version numbering). There are currently two such classes
+implemented: StrictVersion and LooseVersion.
+
+Every version number class implements the following interface:
+ * the 'parse' method takes a string and parses it to some internal
+ representation; if the string is an invalid version number,
+ 'parse' raises a ValueError exception
+ * the class constructor takes an optional string argument which,
+ if supplied, is passed to 'parse'
+ * __str__ reconstructs the string that was passed to 'parse' (or
+ an equivalent string -- ie. one that will generate an equivalent
+ version number instance)
+ * __repr__ generates Python code to recreate the version number instance
+ * __cmp__ compares the current instance with either another instance
+ of the same class or a string (which will be parsed to an instance
+ of the same class, thus must follow the same rules)
+"""
+
+import string, re
+from types import StringType
+
+class Version:
+ """Abstract base class for version numbering classes. Just provides
+ constructor (__init__) and reproducer (__repr__), because those
+ seem to be the same for all version numbering classes.
+ """
+
+ def __init__ (self, vstring=None):
+ if vstring:
+ self.parse(vstring)
+
+ def __repr__ (self):
+ return "%s ('%s')" % (self.__class__.__name__, str(self))
+
+
+# Interface for version-number classes -- must be implemented
+# by the following classes (the concrete ones -- Version should
+# be treated as an abstract class).
+# __init__ (string) - create and take same action as 'parse'
+# (string parameter is optional)
+# parse (string) - convert a string representation to whatever
+# internal representation is appropriate for
+# this style of version numbering
+# __str__ (self) - convert back to a string; should be very similar
+# (if not identical to) the string supplied to parse
+# __repr__ (self) - generate Python code to recreate
+# the instance
+# __cmp__ (self, other) - compare two version numbers ('other' may
+# be an unparsed version string, or another
+# instance of your version class)
+
+
+class StrictVersion (Version):
+
+ """Version numbering for anal retentives and software idealists.
+ Implements the standard interface for version number classes as
+ described above. A version number consists of two or three
+ dot-separated numeric components, with an optional "pre-release" tag
+ on the end. The pre-release tag consists of the letter 'a' or 'b'
+ followed by a number. If the numeric components of two version
+ numbers are equal, then one with a pre-release tag will always
+ be deemed earlier (lesser) than one without.
+
+ The following are valid version numbers (shown in the order that
+ would be obtained by sorting according to the supplied cmp function):
+
+ 0.4 0.4.0 (these two are equivalent)
+ 0.4.1
+ 0.5a1
+ 0.5b3
+ 0.5
+ 0.9.6
+ 1.0
+ 1.0.4a3
+ 1.0.4b1
+ 1.0.4
+
+ The following are examples of invalid version numbers:
+
+ 1
+ 2.7.2.2
+ 1.3.a4
+ 1.3pl1
+ 1.3c4
+
+ The rationale for this version numbering system will be explained
+ in the distutils documentation.
+ """
+
+ version_re = re.compile(r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$',
+ re.VERBOSE)
+
+
+ def parse (self, vstring):
+ match = self.version_re.match(vstring)
+ if not match:
+ raise ValueError, "invalid version number '%s'" % vstring
+
+ (major, minor, patch, prerelease, prerelease_num) = \
+ match.group(1, 2, 4, 5, 6)
+
+ if patch:
+ self.version = tuple(map(string.atoi, [major, minor, patch]))
+ else:
+ self.version = tuple(map(string.atoi, [major, minor]) + [0])
+
+ if prerelease:
+ self.prerelease = (prerelease[0], string.atoi(prerelease_num))
+ else:
+ self.prerelease = None
+
+
+ def __str__ (self):
+
+ if self.version[2] == 0:
+ vstring = string.join(map(str, self.version[0:2]), '.')
+ else:
+ vstring = string.join(map(str, self.version), '.')
+
+ if self.prerelease:
+ vstring = vstring + self.prerelease[0] + str(self.prerelease[1])
+
+ return vstring
+
+
+ def __cmp__ (self, other):
+ if isinstance(other, StringType):
+ other = StrictVersion(other)
+
+ compare = cmp(self.version, other.version)
+ if (compare == 0): # have to compare prerelease
+
+ # case 1: neither has prerelease; they're equal
+ # case 2: self has prerelease, other doesn't; other is greater
+ # case 3: self doesn't have prerelease, other does: self is greater
+ # case 4: both have prerelease: must compare them!
+
+ if (not self.prerelease and not other.prerelease):
+ return 0
+ elif (self.prerelease and not other.prerelease):
+ return -1
+ elif (not self.prerelease and other.prerelease):
+ return 1
+ elif (self.prerelease and other.prerelease):
+ return cmp(self.prerelease, other.prerelease)
+
+ else: # numeric versions don't match --
+ return compare # prerelease stuff doesn't matter
+
+
+# end class StrictVersion
+
+
+# The rules according to Greg Stein:
+# 1) a version number has 1 or more numbers separated by a period or by
+# sequences of letters. If only periods, then these are compared
+# left-to-right to determine an ordering.
+# 2) sequences of letters are part of the tuple for comparison and are
+# compared lexicographically
+# 3) recognize the numeric components may have leading zeroes
+#
+# The LooseVersion class below implements these rules: a version number
+# string is split up into a tuple of integer and string components, and
+# comparison is a simple tuple comparison. This means that version
+# numbers behave in a predictable and obvious way, but a way that might
+# not necessarily be how people *want* version numbers to behave. There
+# wouldn't be a problem if people could stick to purely numeric version
+# numbers: just split on period and compare the numbers as tuples.
+# However, people insist on putting letters into their version numbers;
+# the most common purpose seems to be:
+# - indicating a "pre-release" version
+# ('alpha', 'beta', 'a', 'b', 'pre', 'p')
+# - indicating a post-release patch ('p', 'pl', 'patch')
+# but of course this can't cover all version number schemes, and there's
+# no way to know what a programmer means without asking him.
+#
+# The problem is what to do with letters (and other non-numeric
+# characters) in a version number. The current implementation does the
+# obvious and predictable thing: keep them as strings and compare
+# lexically within a tuple comparison. This has the desired effect if
+# an appended letter sequence implies something "post-release":
+# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002".
+#
+# However, if letters in a version number imply a pre-release version,
+# the "obvious" thing isn't correct. Eg. you would expect that
+# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison
+# implemented here, this just isn't so.
+#
+# Two possible solutions come to mind. The first is to tie the
+# comparison algorithm to a particular set of semantic rules, as has
+# been done in the StrictVersion class above. This works great as long
+# as everyone can go along with bondage and discipline. Hopefully a
+# (large) subset of Python module programmers will agree that the
+# particular flavour of bondage and discipline provided by StrictVersion
+# provides enough benefit to be worth using, and will submit their
+# version numbering scheme to its domination. The free-thinking
+# anarchists in the lot will never give in, though, and something needs
+# to be done to accommodate them.
+#
+# Perhaps a "moderately strict" version class could be implemented that
+# lets almost anything slide (syntactically), and makes some heuristic
+# assumptions about non-digits in version number strings. This could
+# sink into special-case-hell, though; if I was as talented and
+# idiosyncratic as Larry Wall, I'd go ahead and implement a class that
+# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is
+# just as happy dealing with things like "2g6" and "1.13++". I don't
+# think I'm smart enough to do it right though.
+#
+# In any case, I've coded the test suite for this module (see
+# ../test/test_version.py) specifically to fail on things like comparing
+# "1.2a2" and "1.2". That's not because the *code* is doing anything
+# wrong, it's because the simple, obvious design doesn't match my
+# complicated, hairy expectations for real-world version numbers. It
+# would be a snap to fix the test suite to say, "Yep, LooseVersion does
+# the Right Thing" (ie. the code matches the conception). But I'd rather
+# have a conception that matches common notions about version numbers.
+
+class LooseVersion (Version):
+
+ """Version numbering for anarchists and software realists.
+ Implements the standard interface for version number classes as
+ described above. A version number consists of a series of numbers,
+ separated by either periods or strings of letters. When comparing
+ version numbers, the numeric components will be compared
+ numerically, and the alphabetic components lexically. The following
+ are all valid version numbers, in no particular order:
+
+ 1.5.1
+ 1.5.2b2
+ 161
+ 3.10a
+ 8.02
+ 3.4j
+ 1996.07.12
+ 3.2.pl0
+ 3.1.1.6
+ 2g6
+ 11g
+ 0.960923
+ 2.2beta29
+ 1.13++
+ 5.5.kw
+ 2.0b1pl0
+
+ In fact, there is no such thing as an invalid version number under
+ this scheme; the rules for comparison are simple and predictable,
+ but may not always give the results you want (for some definition
+ of "want").
+ """
+
+ component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
+
+ def __init__ (self, vstring=None):
+ if vstring:
+ self.parse(vstring)
+
+
+ def parse (self, vstring):
+ # I've given up on thinking I can reconstruct the version string
+ # from the parsed tuple -- so I just store the string here for
+ # use by __str__
+ self.vstring = vstring
+ components = filter(lambda x: x and x != '.',
+ self.component_re.split(vstring))
+ for i in range(len(components)):
+ try:
+ components[i] = int(components[i])
+ except ValueError:
+ pass
+
+ self.version = components
+
+
+ def __str__ (self):
+ return self.vstring
+
+
+ def __repr__ (self):
+ return "LooseVersion ('%s')" % str(self)
+
+
+ def __cmp__ (self, other):
+ if isinstance(other, StringType):
+ other = LooseVersion(other)
+
+ return cmp(self.version, other.version)
+
+
+# end class LooseVersion
diff --git a/cashew/Lib/distutils/versionpredicate.py b/cashew/Lib/distutils/versionpredicate.py
new file mode 100644
index 0000000..ba8b6c0
--- /dev/null
+++ b/cashew/Lib/distutils/versionpredicate.py
@@ -0,0 +1,164 @@
+"""Module for parsing and testing package version predicate strings.
+"""
+import re
+import distutils.version
+import operator
+
+
+re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)")
+# (package) (rest)
+
+re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses
+re_splitComparison = re.compile(r"^\s*(<=|>=|<|>|!=|==)\s*([^\s,]+)\s*$")
+# (comp) (version)
+
+
+def splitUp(pred):
+ """Parse a single version comparison.
+
+ Return (comparison string, StrictVersion)
+ """
+ res = re_splitComparison.match(pred)
+ if not res:
+ raise ValueError("bad package restriction syntax: %r" % pred)
+ comp, verStr = res.groups()
+ return (comp, distutils.version.StrictVersion(verStr))
+
+compmap = {"<": operator.lt, "<=": operator.le, "==": operator.eq,
+ ">": operator.gt, ">=": operator.ge, "!=": operator.ne}
+
+class VersionPredicate:
+ """Parse and test package version predicates.
+
+ >>> v = VersionPredicate('pyepat.abc (>1.0, <3333.3a1, !=1555.1b3)')
+
+ The `name` attribute provides the full dotted name that is given::
+
+ >>> v.name
+ 'pyepat.abc'
+
+ The str() of a `VersionPredicate` provides a normalized
+ human-readable version of the expression::
+
+ >>> print v
+ pyepat.abc (> 1.0, < 3333.3a1, != 1555.1b3)
+
+ The `satisfied_by()` method can be used to determine with a given
+ version number is included in the set described by the version
+ restrictions::
+
+ >>> v.satisfied_by('1.1')
+ True
+ >>> v.satisfied_by('1.4')
+ True
+ >>> v.satisfied_by('1.0')
+ False
+ >>> v.satisfied_by('4444.4')
+ False
+ >>> v.satisfied_by('1555.1b3')
+ False
+
+ `VersionPredicate` is flexible in accepting extra whitespace::
+
+ >>> v = VersionPredicate(' pat( == 0.1 ) ')
+ >>> v.name
+ 'pat'
+ >>> v.satisfied_by('0.1')
+ True
+ >>> v.satisfied_by('0.2')
+ False
+
+ If any version numbers passed in do not conform to the
+ restrictions of `StrictVersion`, a `ValueError` is raised::
+
+ >>> v = VersionPredicate('p1.p2.p3.p4(>=1.0, <=1.3a1, !=1.2zb3)')
+ Traceback (most recent call last):
+ ...
+ ValueError: invalid version number '1.2zb3'
+
+ It the module or package name given does not conform to what's
+ allowed as a legal module or package name, `ValueError` is
+ raised::
+
+ >>> v = VersionPredicate('foo-bar')
+ Traceback (most recent call last):
+ ...
+ ValueError: expected parenthesized list: '-bar'
+
+ >>> v = VersionPredicate('foo bar (12.21)')
+ Traceback (most recent call last):
+ ...
+ ValueError: expected parenthesized list: 'bar (12.21)'
+
+ """
+
+ def __init__(self, versionPredicateStr):
+ """Parse a version predicate string.
+ """
+ # Fields:
+ # name: package name
+ # pred: list of (comparison string, StrictVersion)
+
+ versionPredicateStr = versionPredicateStr.strip()
+ if not versionPredicateStr:
+ raise ValueError("empty package restriction")
+ match = re_validPackage.match(versionPredicateStr)
+ if not match:
+ raise ValueError("bad package name in %r" % versionPredicateStr)
+ self.name, paren = match.groups()
+ paren = paren.strip()
+ if paren:
+ match = re_paren.match(paren)
+ if not match:
+ raise ValueError("expected parenthesized list: %r" % paren)
+ str = match.groups()[0]
+ self.pred = [splitUp(aPred) for aPred in str.split(",")]
+ if not self.pred:
+ raise ValueError("empty parenthesized list in %r"
+ % versionPredicateStr)
+ else:
+ self.pred = []
+
+ def __str__(self):
+ if self.pred:
+ seq = [cond + " " + str(ver) for cond, ver in self.pred]
+ return self.name + " (" + ", ".join(seq) + ")"
+ else:
+ return self.name
+
+ def satisfied_by(self, version):
+ """True if version is compatible with all the predicates in self.
+ The parameter version must be acceptable to the StrictVersion
+ constructor. It may be either a string or StrictVersion.
+ """
+ for cond, ver in self.pred:
+ if not compmap[cond](version, ver):
+ return False
+ return True
+
+
+_provision_rx = None
+
+def split_provision(value):
+ """Return the name and optional version number of a provision.
+
+ The version number, if given, will be returned as a `StrictVersion`
+ instance, otherwise it will be `None`.
+
+ >>> split_provision('mypkg')
+ ('mypkg', None)
+ >>> split_provision(' mypkg( 1.2 ) ')
+ ('mypkg', StrictVersion ('1.2'))
+ """
+ global _provision_rx
+ if _provision_rx is None:
+ _provision_rx = re.compile(
+ "([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$")
+ value = value.strip()
+ m = _provision_rx.match(value)
+ if not m:
+ raise ValueError("illegal provides specification: %r" % value)
+ ver = m.group(2) or None
+ if ver:
+ ver = distutils.version.StrictVersion(ver)
+ return m.group(1), ver
diff --git a/cashew/Lib/doctest.py b/cashew/Lib/doctest.py
new file mode 100644
index 0000000..fedf670
--- /dev/null
+++ b/cashew/Lib/doctest.py
@@ -0,0 +1,2817 @@
+# Module doctest.
+# Released to the public domain 16-Jan-2001, by Tim Peters (tim@python.org).
+# Major enhancements and refactoring by:
+# Jim Fulton
+# Edward Loper
+
+# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
+
+r"""Module doctest -- a framework for running examples in docstrings.
+
+In simplest use, end each module M to be tested with:
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
+
+Then running the module as a script will cause the examples in the
+docstrings to get executed and verified:
+
+python M.py
+
+This won't display anything unless an example fails, in which case the
+failing example(s) and the cause(s) of the failure(s) are printed to stdout
+(why not stderr? because stderr is a lame hack <0.2 wink>), and the final
+line of output is "Test failed.".
+
+Run it with the -v switch instead:
+
+python M.py -v
+
+and a detailed report of all examples tried is printed to stdout, along
+with assorted summaries at the end.
+
+You can force verbose mode by passing "verbose=True" to testmod, or prohibit
+it by passing "verbose=False". In either of those cases, sys.argv is not
+examined by testmod.
+
+There are a variety of other ways to run doctests, including integration
+with the unittest framework, and support for running non-Python text
+files containing doctests. There are also many ways to override parts
+of doctest's default behaviors. See the Library Reference Manual for
+details.
+"""
+
+__docformat__ = 'reStructuredText en'
+
+__all__ = [
+ # 0, Option Flags
+ 'register_optionflag',
+ 'DONT_ACCEPT_TRUE_FOR_1',
+ 'DONT_ACCEPT_BLANKLINE',
+ 'NORMALIZE_WHITESPACE',
+ 'ELLIPSIS',
+ 'SKIP',
+ 'IGNORE_EXCEPTION_DETAIL',
+ 'COMPARISON_FLAGS',
+ 'REPORT_UDIFF',
+ 'REPORT_CDIFF',
+ 'REPORT_NDIFF',
+ 'REPORT_ONLY_FIRST_FAILURE',
+ 'REPORTING_FLAGS',
+ # 1. Utility Functions
+ # 2. Example & DocTest
+ 'Example',
+ 'DocTest',
+ # 3. Doctest Parser
+ 'DocTestParser',
+ # 4. Doctest Finder
+ 'DocTestFinder',
+ # 5. Doctest Runner
+ 'DocTestRunner',
+ 'OutputChecker',
+ 'DocTestFailure',
+ 'UnexpectedException',
+ 'DebugRunner',
+ # 6. Test Functions
+ 'testmod',
+ 'testfile',
+ 'run_docstring_examples',
+ # 7. Tester
+ 'Tester',
+ # 8. Unittest Support
+ 'DocTestSuite',
+ 'DocFileSuite',
+ 'set_unittest_reportflags',
+ # 9. Debugging Support
+ 'script_from_examples',
+ 'testsource',
+ 'debug_src',
+ 'debug',
+]
+
+import __future__
+
+import sys, traceback, inspect, linecache, os, re
+import unittest, difflib, pdb, tempfile
+import warnings
+from StringIO import StringIO
+from collections import namedtuple
+
+TestResults = namedtuple('TestResults', 'failed attempted')
+
+# There are 4 basic classes:
+# - Example: a pair, plus an intra-docstring line number.
+# - DocTest: a collection of examples, parsed from a docstring, plus
+# info about where the docstring came from (name, filename, lineno).
+# - DocTestFinder: extracts DocTests from a given object's docstring and
+# its contained objects' docstrings.
+# - DocTestRunner: runs DocTest cases, and accumulates statistics.
+#
+# So the basic picture is:
+#
+# list of:
+# +------+ +---------+ +-------+
+# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results|
+# +------+ +---------+ +-------+
+# | Example |
+# | ... |
+# | Example |
+# +---------+
+
+# Option constants.
+
+OPTIONFLAGS_BY_NAME = {}
+def register_optionflag(name):
+ # Create a new flag unless `name` is already known.
+ return OPTIONFLAGS_BY_NAME.setdefault(name, 1 << len(OPTIONFLAGS_BY_NAME))
+
+DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1')
+DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE')
+NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE')
+ELLIPSIS = register_optionflag('ELLIPSIS')
+SKIP = register_optionflag('SKIP')
+IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL')
+
+COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 |
+ DONT_ACCEPT_BLANKLINE |
+ NORMALIZE_WHITESPACE |
+ ELLIPSIS |
+ SKIP |
+ IGNORE_EXCEPTION_DETAIL)
+
+REPORT_UDIFF = register_optionflag('REPORT_UDIFF')
+REPORT_CDIFF = register_optionflag('REPORT_CDIFF')
+REPORT_NDIFF = register_optionflag('REPORT_NDIFF')
+REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE')
+
+REPORTING_FLAGS = (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF |
+ REPORT_ONLY_FIRST_FAILURE)
+
+# Special string markers for use in `want` strings:
+BLANKLINE_MARKER = ''
+ELLIPSIS_MARKER = '...'
+
+######################################################################
+## Table of Contents
+######################################################################
+# 1. Utility Functions
+# 2. Example & DocTest -- store test cases
+# 3. DocTest Parser -- extracts examples from strings
+# 4. DocTest Finder -- extracts test cases from objects
+# 5. DocTest Runner -- runs test cases
+# 6. Test Functions -- convenient wrappers for testing
+# 7. Tester Class -- for backwards compatibility
+# 8. Unittest Support
+# 9. Debugging Support
+# 10. Example Usage
+
+######################################################################
+## 1. Utility Functions
+######################################################################
+
+def _extract_future_flags(globs):
+ """
+ Return the compiler-flags associated with the future features that
+ have been imported into the given namespace (globs).
+ """
+ flags = 0
+ for fname in __future__.all_feature_names:
+ feature = globs.get(fname, None)
+ if feature is getattr(__future__, fname):
+ flags |= feature.compiler_flag
+ return flags
+
+def _normalize_module(module, depth=2):
+ """
+ Return the module specified by `module`. In particular:
+ - If `module` is a module, then return module.
+ - If `module` is a string, then import and return the
+ module with that name.
+ - If `module` is None, then return the calling module.
+ The calling module is assumed to be the module of
+ the stack frame at the given depth in the call stack.
+ """
+ if inspect.ismodule(module):
+ return module
+ elif isinstance(module, (str, unicode)):
+ return __import__(module, globals(), locals(), ["*"])
+ elif module is None:
+ return sys.modules[sys._getframe(depth).f_globals['__name__']]
+ else:
+ raise TypeError("Expected a module, string, or None")
+
+def _load_testfile(filename, package, module_relative):
+ if module_relative:
+ package = _normalize_module(package, 3)
+ filename = _module_relative_path(package, filename)
+ if hasattr(package, '__loader__'):
+ if hasattr(package.__loader__, 'get_data'):
+ file_contents = package.__loader__.get_data(filename)
+ # get_data() opens files as 'rb', so one must do the equivalent
+ # conversion as universal newlines would do.
+ return file_contents.replace(os.linesep, '\n'), filename
+ with open(filename, 'U') as f:
+ return f.read(), filename
+
+# Use sys.stdout encoding for output.
+_encoding = getattr(sys.__stdout__, 'encoding', None) or 'utf-8'
+
+def _indent(s, indent=4):
+ """
+ Add the given number of space characters to the beginning of
+ every non-blank line in `s`, and return the result.
+ If the string `s` is Unicode, it is encoded using the stdout
+ encoding and the `backslashreplace` error handler.
+ """
+ if isinstance(s, unicode):
+ s = s.encode(_encoding, 'backslashreplace')
+ # This regexp matches the start of non-blank lines:
+ return re.sub('(?m)^(?!$)', indent*' ', s)
+
+def _exception_traceback(exc_info):
+ """
+ Return a string containing a traceback message for the given
+ exc_info tuple (as returned by sys.exc_info()).
+ """
+ # Get a traceback message.
+ excout = StringIO()
+ exc_type, exc_val, exc_tb = exc_info
+ traceback.print_exception(exc_type, exc_val, exc_tb, file=excout)
+ return excout.getvalue()
+
+# Override some StringIO methods.
+class _SpoofOut(StringIO):
+ def getvalue(self):
+ result = StringIO.getvalue(self)
+ # If anything at all was written, make sure there's a trailing
+ # newline. There's no way for the expected output to indicate
+ # that a trailing newline is missing.
+ if result and not result.endswith("\n"):
+ result += "\n"
+ # Prevent softspace from screwing up the next test case, in
+ # case they used print with a trailing comma in an example.
+ if hasattr(self, "softspace"):
+ del self.softspace
+ return result
+
+ def truncate(self, size=None):
+ StringIO.truncate(self, size)
+ if hasattr(self, "softspace"):
+ del self.softspace
+ if not self.buf:
+ # Reset it to an empty string, to make sure it's not unicode.
+ self.buf = ''
+
+# Worst-case linear-time ellipsis matching.
+def _ellipsis_match(want, got):
+ """
+ Essentially the only subtle case:
+ >>> _ellipsis_match('aa...aa', 'aaa')
+ False
+ """
+ if ELLIPSIS_MARKER not in want:
+ return want == got
+
+ # Find "the real" strings.
+ ws = want.split(ELLIPSIS_MARKER)
+ assert len(ws) >= 2
+
+ # Deal with exact matches possibly needed at one or both ends.
+ startpos, endpos = 0, len(got)
+ w = ws[0]
+ if w: # starts with exact match
+ if got.startswith(w):
+ startpos = len(w)
+ del ws[0]
+ else:
+ return False
+ w = ws[-1]
+ if w: # ends with exact match
+ if got.endswith(w):
+ endpos -= len(w)
+ del ws[-1]
+ else:
+ return False
+
+ if startpos > endpos:
+ # Exact end matches required more characters than we have, as in
+ # _ellipsis_match('aa...aa', 'aaa')
+ return False
+
+ # For the rest, we only need to find the leftmost non-overlapping
+ # match for each piece. If there's no overall match that way alone,
+ # there's no overall match period.
+ for w in ws:
+ # w may be '' at times, if there are consecutive ellipses, or
+ # due to an ellipsis at the start or end of `want`. That's OK.
+ # Search for an empty string succeeds, and doesn't change startpos.
+ startpos = got.find(w, startpos, endpos)
+ if startpos < 0:
+ return False
+ startpos += len(w)
+
+ return True
+
+def _comment_line(line):
+ "Return a commented form of the given line"
+ line = line.rstrip()
+ if line:
+ return '# '+line
+ else:
+ return '#'
+
+def _strip_exception_details(msg):
+ # Support for IGNORE_EXCEPTION_DETAIL.
+ # Get rid of everything except the exception name; in particular, drop
+ # the possibly dotted module path (if any) and the exception message (if
+ # any). We assume that a colon is never part of a dotted name, or of an
+ # exception name.
+ # E.g., given
+ # "foo.bar.MyError: la di da"
+ # return "MyError"
+ # Or for "abc.def" or "abc.def:\n" return "def".
+
+ start, end = 0, len(msg)
+ # The exception name must appear on the first line.
+ i = msg.find("\n")
+ if i >= 0:
+ end = i
+ # retain up to the first colon (if any)
+ i = msg.find(':', 0, end)
+ if i >= 0:
+ end = i
+ # retain just the exception name
+ i = msg.rfind('.', 0, end)
+ if i >= 0:
+ start = i+1
+ return msg[start: end]
+
+class _OutputRedirectingPdb(pdb.Pdb):
+ """
+ A specialized version of the python debugger that redirects stdout
+ to a given stream when interacting with the user. Stdout is *not*
+ redirected when traced code is executed.
+ """
+ def __init__(self, out):
+ self.__out = out
+ self.__debugger_used = False
+ pdb.Pdb.__init__(self, stdout=out)
+ # still use input() to get user input
+ self.use_rawinput = 1
+
+ def set_trace(self, frame=None):
+ self.__debugger_used = True
+ if frame is None:
+ frame = sys._getframe().f_back
+ pdb.Pdb.set_trace(self, frame)
+
+ def set_continue(self):
+ # Calling set_continue unconditionally would break unit test
+ # coverage reporting, as Bdb.set_continue calls sys.settrace(None).
+ if self.__debugger_used:
+ pdb.Pdb.set_continue(self)
+
+ def trace_dispatch(self, *args):
+ # Redirect stdout to the given stream.
+ save_stdout = sys.stdout
+ sys.stdout = self.__out
+ # Call Pdb's trace dispatch method.
+ try:
+ return pdb.Pdb.trace_dispatch(self, *args)
+ finally:
+ sys.stdout = save_stdout
+
+# [XX] Normalize with respect to os.path.pardir?
+def _module_relative_path(module, path):
+ if not inspect.ismodule(module):
+ raise TypeError, 'Expected a module: %r' % module
+ if path.startswith('/'):
+ raise ValueError, 'Module-relative files may not have absolute paths'
+
+ # Find the base directory for the path.
+ if hasattr(module, '__file__'):
+ # A normal module/package
+ basedir = os.path.split(module.__file__)[0]
+ elif module.__name__ == '__main__':
+ # An interactive session.
+ if len(sys.argv)>0 and sys.argv[0] != '':
+ basedir = os.path.split(sys.argv[0])[0]
+ else:
+ basedir = os.curdir
+ else:
+ # A module w/o __file__ (this includes builtins)
+ raise ValueError("Can't resolve paths relative to the module " +
+ module + " (it has no __file__)")
+
+ # Combine the base directory and the path.
+ return os.path.join(basedir, *(path.split('/')))
+
+######################################################################
+## 2. Example & DocTest
+######################################################################
+## - An "example" is a pair, where "source" is a
+## fragment of source code, and "want" is the expected output for
+## "source." The Example class also includes information about
+## where the example was extracted from.
+##
+## - A "doctest" is a collection of examples, typically extracted from
+## a string (such as an object's docstring). The DocTest class also
+## includes information about where the string was extracted from.
+
+class Example:
+ """
+ A single doctest example, consisting of source code and expected
+ output. `Example` defines the following attributes:
+
+ - source: A single Python statement, always ending with a newline.
+ The constructor adds a newline if needed.
+
+ - want: The expected output from running the source code (either
+ from stdout, or a traceback in case of exception). `want` ends
+ with a newline unless it's empty, in which case it's an empty
+ string. The constructor adds a newline if needed.
+
+ - exc_msg: The exception message generated by the example, if
+ the example is expected to generate an exception; or `None` if
+ it is not expected to generate an exception. This exception
+ message is compared against the return value of
+ `traceback.format_exception_only()`. `exc_msg` ends with a
+ newline unless it's `None`. The constructor adds a newline
+ if needed.
+
+ - lineno: The line number within the DocTest string containing
+ this Example where the Example begins. This line number is
+ zero-based, with respect to the beginning of the DocTest.
+
+ - indent: The example's indentation in the DocTest string.
+ I.e., the number of space characters that precede the
+ example's first prompt.
+
+ - options: A dictionary mapping from option flags to True or
+ False, which is used to override default options for this
+ example. Any option flags not contained in this dictionary
+ are left at their default value (as specified by the
+ DocTestRunner's optionflags). By default, no options are set.
+ """
+ def __init__(self, source, want, exc_msg=None, lineno=0, indent=0,
+ options=None):
+ # Normalize inputs.
+ if not source.endswith('\n'):
+ source += '\n'
+ if want and not want.endswith('\n'):
+ want += '\n'
+ if exc_msg is not None and not exc_msg.endswith('\n'):
+ exc_msg += '\n'
+ # Store properties.
+ self.source = source
+ self.want = want
+ self.lineno = lineno
+ self.indent = indent
+ if options is None: options = {}
+ self.options = options
+ self.exc_msg = exc_msg
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self.source == other.source and \
+ self.want == other.want and \
+ self.lineno == other.lineno and \
+ self.indent == other.indent and \
+ self.options == other.options and \
+ self.exc_msg == other.exc_msg
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash((self.source, self.want, self.lineno, self.indent,
+ self.exc_msg))
+
+
+class DocTest:
+ """
+ A collection of doctest examples that should be run in a single
+ namespace. Each `DocTest` defines the following attributes:
+
+ - examples: the list of examples.
+
+ - globs: The namespace (aka globals) that the examples should
+ be run in.
+
+ - name: A name identifying the DocTest (typically, the name of
+ the object whose docstring this DocTest was extracted from).
+
+ - filename: The name of the file that this DocTest was extracted
+ from, or `None` if the filename is unknown.
+
+ - lineno: The line number within filename where this DocTest
+ begins, or `None` if the line number is unavailable. This
+ line number is zero-based, with respect to the beginning of
+ the file.
+
+ - docstring: The string that the examples were extracted from,
+ or `None` if the string is unavailable.
+ """
+ def __init__(self, examples, globs, name, filename, lineno, docstring):
+ """
+ Create a new DocTest containing the given examples. The
+ DocTest's globals are initialized with a copy of `globs`.
+ """
+ assert not isinstance(examples, basestring), \
+ "DocTest no longer accepts str; use DocTestParser instead"
+ self.examples = examples
+ self.docstring = docstring
+ self.globs = globs.copy()
+ self.name = name
+ self.filename = filename
+ self.lineno = lineno
+
+ def __repr__(self):
+ if len(self.examples) == 0:
+ examples = 'no examples'
+ elif len(self.examples) == 1:
+ examples = '1 example'
+ else:
+ examples = '%d examples' % len(self.examples)
+ return ('' %
+ (self.name, self.filename, self.lineno, examples))
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self.examples == other.examples and \
+ self.docstring == other.docstring and \
+ self.globs == other.globs and \
+ self.name == other.name and \
+ self.filename == other.filename and \
+ self.lineno == other.lineno
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash((self.docstring, self.name, self.filename, self.lineno))
+
+ # This lets us sort tests by name:
+ def __cmp__(self, other):
+ if not isinstance(other, DocTest):
+ return -1
+ return cmp((self.name, self.filename, self.lineno, id(self)),
+ (other.name, other.filename, other.lineno, id(other)))
+
+######################################################################
+## 3. DocTestParser
+######################################################################
+
+class DocTestParser:
+ """
+ A class used to parse strings containing doctest examples.
+ """
+ # This regular expression is used to find doctest examples in a
+ # string. It defines three groups: `source` is the source code
+ # (including leading indentation and prompts); `indent` is the
+ # indentation of the first (PS1) line of the source code; and
+ # `want` is the expected output (including leading indentation).
+ _EXAMPLE_RE = re.compile(r'''
+ # Source consists of a PS1 line followed by zero or more PS2 lines.
+ (?P
+ (?:^(?P [ ]*) >>> .*) # PS1 line
+ (?:\n [ ]* \.\.\. .*)*) # PS2 lines
+ \n?
+ # Want consists of any non-blank lines that do not start with PS1.
+ (?P (?:(?![ ]*$) # Not a blank line
+ (?![ ]*>>>) # Not a line starting with PS1
+ .+$\n? # But any other line
+ )*)
+ ''', re.MULTILINE | re.VERBOSE)
+
+ # A regular expression for handling `want` strings that contain
+ # expected exceptions. It divides `want` into three pieces:
+ # - the traceback header line (`hdr`)
+ # - the traceback stack (`stack`)
+ # - the exception message (`msg`), as generated by
+ # traceback.format_exception_only()
+ # `msg` may have multiple lines. We assume/require that the
+ # exception message is the first non-indented line starting with a word
+ # character following the traceback header line.
+ _EXCEPTION_RE = re.compile(r"""
+ # Grab the traceback header. Different versions of Python have
+ # said different things on the first traceback line.
+ ^(?P Traceback\ \(
+ (?: most\ recent\ call\ last
+ | innermost\ last
+ ) \) :
+ )
+ \s* $ # toss trailing whitespace on the header.
+ (?P .*?) # don't blink: absorb stuff until...
+ ^ (?P \w+ .*) # a line *starts* with alphanum.
+ """, re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+ # A callable returning a true value iff its argument is a blank line
+ # or contains a single comment.
+ _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match
+
+ def parse(self, string, name=''):
+ """
+ Divide the given string into examples and intervening text,
+ and return them as a list of alternating Examples and strings.
+ Line numbers for the Examples are 0-based. The optional
+ argument `name` is a name identifying this string, and is only
+ used for error messages.
+ """
+ string = string.expandtabs()
+ # If all lines begin with the same indentation, then strip it.
+ min_indent = self._min_indent(string)
+ if min_indent > 0:
+ string = '\n'.join([l[min_indent:] for l in string.split('\n')])
+
+ output = []
+ charno, lineno = 0, 0
+ # Find all doctest examples in the string:
+ for m in self._EXAMPLE_RE.finditer(string):
+ # Add the pre-example text to `output`.
+ output.append(string[charno:m.start()])
+ # Update lineno (lines before this example)
+ lineno += string.count('\n', charno, m.start())
+ # Extract info from the regexp match.
+ (source, options, want, exc_msg) = \
+ self._parse_example(m, name, lineno)
+ # Create an Example, and add it to the list.
+ if not self._IS_BLANK_OR_COMMENT(source):
+ output.append( Example(source, want, exc_msg,
+ lineno=lineno,
+ indent=min_indent+len(m.group('indent')),
+ options=options) )
+ # Update lineno (lines inside this example)
+ lineno += string.count('\n', m.start(), m.end())
+ # Update charno.
+ charno = m.end()
+ # Add any remaining post-example text to `output`.
+ output.append(string[charno:])
+ return output
+
+ def get_doctest(self, string, globs, name, filename, lineno):
+ """
+ Extract all doctest examples from the given string, and
+ collect them into a `DocTest` object.
+
+ `globs`, `name`, `filename`, and `lineno` are attributes for
+ the new `DocTest` object. See the documentation for `DocTest`
+ for more information.
+ """
+ return DocTest(self.get_examples(string, name), globs,
+ name, filename, lineno, string)
+
+ def get_examples(self, string, name=''):
+ """
+ Extract all doctest examples from the given string, and return
+ them as a list of `Example` objects. Line numbers are
+ 0-based, because it's most common in doctests that nothing
+ interesting appears on the same line as opening triple-quote,
+ and so the first interesting line is called \"line 1\" then.
+
+ The optional argument `name` is a name identifying this
+ string, and is only used for error messages.
+ """
+ return [x for x in self.parse(string, name)
+ if isinstance(x, Example)]
+
+ def _parse_example(self, m, name, lineno):
+ """
+ Given a regular expression match from `_EXAMPLE_RE` (`m`),
+ return a pair `(source, want)`, where `source` is the matched
+ example's source code (with prompts and indentation stripped);
+ and `want` is the example's expected output (with indentation
+ stripped).
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ # Get the example's indentation level.
+ indent = len(m.group('indent'))
+
+ # Divide source into lines; check that they're properly
+ # indented; and then strip their indentation & prompts.
+ source_lines = m.group('source').split('\n')
+ self._check_prompt_blank(source_lines, indent, name, lineno)
+ self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno)
+ source = '\n'.join([sl[indent+4:] for sl in source_lines])
+
+ # Divide want into lines; check that it's properly indented; and
+ # then strip the indentation. Spaces before the last newline should
+ # be preserved, so plain rstrip() isn't good enough.
+ want = m.group('want')
+ want_lines = want.split('\n')
+ if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]):
+ del want_lines[-1] # forget final newline & spaces after it
+ self._check_prefix(want_lines, ' '*indent, name,
+ lineno + len(source_lines))
+ want = '\n'.join([wl[indent:] for wl in want_lines])
+
+ # If `want` contains a traceback message, then extract it.
+ m = self._EXCEPTION_RE.match(want)
+ if m:
+ exc_msg = m.group('msg')
+ else:
+ exc_msg = None
+
+ # Extract options from the source.
+ options = self._find_options(source, name, lineno)
+
+ return source, options, want, exc_msg
+
+ # This regular expression looks for option directives in the
+ # source code of an example. Option directives are comments
+ # starting with "doctest:". Warning: this may give false
+ # positives for string-literals that contain the string
+ # "#doctest:". Eliminating these false positives would require
+ # actually parsing the string; but we limit them by ignoring any
+ # line containing "#doctest:" that is *followed* by a quote mark.
+ _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$',
+ re.MULTILINE)
+
+ def _find_options(self, source, name, lineno):
+ """
+ Return a dictionary containing option overrides extracted from
+ option directives in the given source string.
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ options = {}
+ # (note: with the current regexp, this will match at most once:)
+ for m in self._OPTION_DIRECTIVE_RE.finditer(source):
+ option_strings = m.group(1).replace(',', ' ').split()
+ for option in option_strings:
+ if (option[0] not in '+-' or
+ option[1:] not in OPTIONFLAGS_BY_NAME):
+ raise ValueError('line %r of the doctest for %s '
+ 'has an invalid option: %r' %
+ (lineno+1, name, option))
+ flag = OPTIONFLAGS_BY_NAME[option[1:]]
+ options[flag] = (option[0] == '+')
+ if options and self._IS_BLANK_OR_COMMENT(source):
+ raise ValueError('line %r of the doctest for %s has an option '
+ 'directive on a line with no example: %r' %
+ (lineno, name, source))
+ return options
+
+ # This regular expression finds the indentation of every non-blank
+ # line in a string.
+ _INDENT_RE = re.compile('^([ ]*)(?=\S)', re.MULTILINE)
+
+ def _min_indent(self, s):
+ "Return the minimum indentation of any non-blank line in `s`"
+ indents = [len(indent) for indent in self._INDENT_RE.findall(s)]
+ if len(indents) > 0:
+ return min(indents)
+ else:
+ return 0
+
+ def _check_prompt_blank(self, lines, indent, name, lineno):
+ """
+ Given the lines of a source string (including prompts and
+ leading indentation), check to make sure that every prompt is
+ followed by a space character. If any line is not followed by
+ a space character, then raise ValueError.
+ """
+ for i, line in enumerate(lines):
+ if len(line) >= indent+4 and line[indent+3] != ' ':
+ raise ValueError('line %r of the docstring for %s '
+ 'lacks blank after %s: %r' %
+ (lineno+i+1, name,
+ line[indent:indent+3], line))
+
+ def _check_prefix(self, lines, prefix, name, lineno):
+ """
+ Check that every line in the given list starts with the given
+ prefix; if any line does not, then raise a ValueError.
+ """
+ for i, line in enumerate(lines):
+ if line and not line.startswith(prefix):
+ raise ValueError('line %r of the docstring for %s has '
+ 'inconsistent leading whitespace: %r' %
+ (lineno+i+1, name, line))
+
+
+######################################################################
+## 4. DocTest Finder
+######################################################################
+
+class DocTestFinder:
+ """
+ A class used to extract the DocTests that are relevant to a given
+ object, from its docstring and the docstrings of its contained
+ objects. Doctests can currently be extracted from the following
+ object types: modules, functions, classes, methods, staticmethods,
+ classmethods, and properties.
+ """
+
+ def __init__(self, verbose=False, parser=DocTestParser(),
+ recurse=True, exclude_empty=True):
+ """
+ Create a new doctest finder.
+
+ The optional argument `parser` specifies a class or
+ function that should be used to create new DocTest objects (or
+ objects that implement the same interface as DocTest). The
+ signature for this factory function should match the signature
+ of the DocTest constructor.
+
+ If the optional argument `recurse` is false, then `find` will
+ only examine the given object, and not any contained objects.
+
+ If the optional argument `exclude_empty` is false, then `find`
+ will include tests for objects with empty docstrings.
+ """
+ self._parser = parser
+ self._verbose = verbose
+ self._recurse = recurse
+ self._exclude_empty = exclude_empty
+
+ def find(self, obj, name=None, module=None, globs=None, extraglobs=None):
+ """
+ Return a list of the DocTests that are defined by the given
+ object's docstring, or by any of its contained objects'
+ docstrings.
+
+ The optional parameter `module` is the module that contains
+ the given object. If the module is not specified or is None, then
+ the test finder will attempt to automatically determine the
+ correct module. The object's module is used:
+
+ - As a default namespace, if `globs` is not specified.
+ - To prevent the DocTestFinder from extracting DocTests
+ from objects that are imported from other modules.
+ - To find the name of the file containing the object.
+ - To help find the line number of the object within its
+ file.
+
+ Contained objects whose module does not match `module` are ignored.
+
+ If `module` is False, no attempt to find the module will be made.
+ This is obscure, of use mostly in tests: if `module` is False, or
+ is None but cannot be found automatically, then all objects are
+ considered to belong to the (non-existent) module, so all contained
+ objects will (recursively) be searched for doctests.
+
+ The globals for each DocTest is formed by combining `globs`
+ and `extraglobs` (bindings in `extraglobs` override bindings
+ in `globs`). A new copy of the globals dictionary is created
+ for each DocTest. If `globs` is not specified, then it
+ defaults to the module's `__dict__`, if specified, or {}
+ otherwise. If `extraglobs` is not specified, then it defaults
+ to {}.
+
+ """
+ # If name was not specified, then extract it from the object.
+ if name is None:
+ name = getattr(obj, '__name__', None)
+ if name is None:
+ raise ValueError("DocTestFinder.find: name must be given "
+ "when obj.__name__ doesn't exist: %r" %
+ (type(obj),))
+
+ # Find the module that contains the given object (if obj is
+ # a module, then module=obj.). Note: this may fail, in which
+ # case module will be None.
+ if module is False:
+ module = None
+ elif module is None:
+ module = inspect.getmodule(obj)
+
+ # Read the module's source code. This is used by
+ # DocTestFinder._find_lineno to find the line number for a
+ # given object's docstring.
+ try:
+ file = inspect.getsourcefile(obj) or inspect.getfile(obj)
+ if module is not None:
+ # Supply the module globals in case the module was
+ # originally loaded via a PEP 302 loader and
+ # file is not a valid filesystem path
+ source_lines = linecache.getlines(file, module.__dict__)
+ else:
+ # No access to a loader, so assume it's a normal
+ # filesystem path
+ source_lines = linecache.getlines(file)
+ if not source_lines:
+ source_lines = None
+ except TypeError:
+ source_lines = None
+
+ # Initialize globals, and merge in extraglobs.
+ if globs is None:
+ if module is None:
+ globs = {}
+ else:
+ globs = module.__dict__.copy()
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+ if '__name__' not in globs:
+ globs['__name__'] = '__main__' # provide a default module name
+
+ # Recursively explore `obj`, extracting DocTests.
+ tests = []
+ self._find(tests, obj, name, module, source_lines, globs, {})
+ # Sort the tests by alpha order of names, for consistency in
+ # verbose-mode output. This was a feature of doctest in Pythons
+ # <= 2.3 that got lost by accident in 2.4. It was repaired in
+ # 2.4.4 and 2.5.
+ tests.sort()
+ return tests
+
+ def _from_module(self, module, object):
+ """
+ Return true if the given object is defined in the given
+ module.
+ """
+ if module is None:
+ return True
+ elif inspect.getmodule(object) is not None:
+ return module is inspect.getmodule(object)
+ elif inspect.isfunction(object):
+ return module.__dict__ is object.func_globals
+ elif inspect.isclass(object):
+ return module.__name__ == object.__module__
+ elif hasattr(object, '__module__'):
+ return module.__name__ == object.__module__
+ elif isinstance(object, property):
+ return True # [XX] no way not be sure.
+ else:
+ raise ValueError("object must be a class or function")
+
+ def _find(self, tests, obj, name, module, source_lines, globs, seen):
+ """
+ Find tests for the given object and any contained objects, and
+ add them to `tests`.
+ """
+ if self._verbose:
+ print 'Finding tests in %s' % name
+
+ # If we've already processed this object, then ignore it.
+ if id(obj) in seen:
+ return
+ seen[id(obj)] = 1
+
+ # Find a test for this object, and add it to the list of tests.
+ test = self._get_test(obj, name, module, globs, source_lines)
+ if test is not None:
+ tests.append(test)
+
+ # Look for tests in a module's contained objects.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ valname = '%s.%s' % (name, valname)
+ # Recurse to functions & classes.
+ if ((inspect.isfunction(val) or inspect.isclass(val)) and
+ self._from_module(module, val)):
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a module's __test__ dictionary.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in getattr(obj, '__test__', {}).items():
+ if not isinstance(valname, basestring):
+ raise ValueError("DocTestFinder.find: __test__ keys "
+ "must be strings: %r" %
+ (type(valname),))
+ if not (inspect.isfunction(val) or inspect.isclass(val) or
+ inspect.ismethod(val) or inspect.ismodule(val) or
+ isinstance(val, basestring)):
+ raise ValueError("DocTestFinder.find: __test__ values "
+ "must be strings, functions, methods, "
+ "classes, or modules: %r" %
+ (type(val),))
+ valname = '%s.__test__.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a class's contained objects.
+ if inspect.isclass(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ # Special handling for staticmethod/classmethod.
+ if isinstance(val, staticmethod):
+ val = getattr(obj, valname)
+ if isinstance(val, classmethod):
+ val = getattr(obj, valname).im_func
+
+ # Recurse to methods, properties, and nested classes.
+ if ((inspect.isfunction(val) or inspect.isclass(val) or
+ isinstance(val, property)) and
+ self._from_module(module, val)):
+ valname = '%s.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ def _get_test(self, obj, name, module, globs, source_lines):
+ """
+ Return a DocTest for the given object, if it defines a docstring;
+ otherwise, return None.
+ """
+ # Extract the object's docstring. If it doesn't have one,
+ # then return None (no test for this object).
+ if isinstance(obj, basestring):
+ docstring = obj
+ else:
+ try:
+ if obj.__doc__ is None:
+ docstring = ''
+ else:
+ docstring = obj.__doc__
+ if not isinstance(docstring, basestring):
+ docstring = str(docstring)
+ except (TypeError, AttributeError):
+ docstring = ''
+
+ # Find the docstring's location in the file.
+ lineno = self._find_lineno(obj, source_lines)
+
+ # Don't bother if the docstring is empty.
+ if self._exclude_empty and not docstring:
+ return None
+
+ # Return a DocTest for this object.
+ if module is None:
+ filename = None
+ else:
+ filename = getattr(module, '__file__', module.__name__)
+ if filename[-4:] in (".pyc", ".pyo"):
+ filename = filename[:-1]
+ return self._parser.get_doctest(docstring, globs, name,
+ filename, lineno)
+
+ def _find_lineno(self, obj, source_lines):
+ """
+ Return a line number of the given object's docstring. Note:
+ this method assumes that the object has a docstring.
+ """
+ lineno = None
+
+ # Find the line number for modules.
+ if inspect.ismodule(obj):
+ lineno = 0
+
+ # Find the line number for classes.
+ # Note: this could be fooled if a class is defined multiple
+ # times in a single file.
+ if inspect.isclass(obj):
+ if source_lines is None:
+ return None
+ pat = re.compile(r'^\s*class\s*%s\b' %
+ getattr(obj, '__name__', '-'))
+ for i, line in enumerate(source_lines):
+ if pat.match(line):
+ lineno = i
+ break
+
+ # Find the line number for functions & methods.
+ if inspect.ismethod(obj): obj = obj.im_func
+ if inspect.isfunction(obj): obj = obj.func_code
+ if inspect.istraceback(obj): obj = obj.tb_frame
+ if inspect.isframe(obj): obj = obj.f_code
+ if inspect.iscode(obj):
+ lineno = getattr(obj, 'co_firstlineno', None)-1
+
+ # Find the line number where the docstring starts. Assume
+ # that it's the first line that begins with a quote mark.
+ # Note: this could be fooled by a multiline function
+ # signature, where a continuation line begins with a quote
+ # mark.
+ if lineno is not None:
+ if source_lines is None:
+ return lineno+1
+ pat = re.compile('(^|.*:)\s*\w*("|\')')
+ for lineno in range(lineno, len(source_lines)):
+ if pat.match(source_lines[lineno]):
+ return lineno
+
+ # We couldn't find the line number.
+ return None
+
+######################################################################
+## 5. DocTest Runner
+######################################################################
+
+class DocTestRunner:
+ """
+ A class used to run DocTest test cases, and accumulate statistics.
+ The `run` method is used to process a single DocTest case. It
+ returns a tuple `(f, t)`, where `t` is the number of test cases
+ tried, and `f` is the number of test cases that failed.
+
+ >>> tests = DocTestFinder().find(_TestClass)
+ >>> runner = DocTestRunner(verbose=False)
+ >>> tests.sort(key = lambda test: test.name)
+ >>> for test in tests:
+ ... print test.name, '->', runner.run(test)
+ _TestClass -> TestResults(failed=0, attempted=2)
+ _TestClass.__init__ -> TestResults(failed=0, attempted=2)
+ _TestClass.get -> TestResults(failed=0, attempted=2)
+ _TestClass.square -> TestResults(failed=0, attempted=1)
+
+ The `summarize` method prints a summary of all the test cases that
+ have been run by the runner, and returns an aggregated `(f, t)`
+ tuple:
+
+ >>> runner.summarize(verbose=1)
+ 4 items passed all tests:
+ 2 tests in _TestClass
+ 2 tests in _TestClass.__init__
+ 2 tests in _TestClass.get
+ 1 tests in _TestClass.square
+ 7 tests in 4 items.
+ 7 passed and 0 failed.
+ Test passed.
+ TestResults(failed=0, attempted=7)
+
+ The aggregated number of tried examples and failed examples is
+ also available via the `tries` and `failures` attributes:
+
+ >>> runner.tries
+ 7
+ >>> runner.failures
+ 0
+
+ The comparison between expected outputs and actual outputs is done
+ by an `OutputChecker`. This comparison may be customized with a
+ number of option flags; see the documentation for `testmod` for
+ more information. If the option flags are insufficient, then the
+ comparison may also be customized by passing a subclass of
+ `OutputChecker` to the constructor.
+
+ The test runner's display output can be controlled in two ways.
+ First, an output function (`out) can be passed to
+ `TestRunner.run`; this function will be called with strings that
+ should be displayed. It defaults to `sys.stdout.write`. If
+ capturing the output is not sufficient, then the display output
+ can be also customized by subclassing DocTestRunner, and
+ overriding the methods `report_start`, `report_success`,
+ `report_unexpected_exception`, and `report_failure`.
+ """
+ # This divider string is used to separate failure messages, and to
+ # separate sections of the summary.
+ DIVIDER = "*" * 70
+
+ def __init__(self, checker=None, verbose=None, optionflags=0):
+ """
+ Create a new test runner.
+
+ Optional keyword arg `checker` is the `OutputChecker` that
+ should be used to compare the expected outputs and actual
+ outputs of doctest examples.
+
+ Optional keyword arg 'verbose' prints lots of stuff if true,
+ only failures if false; by default, it's true iff '-v' is in
+ sys.argv.
+
+ Optional argument `optionflags` can be used to control how the
+ test runner compares expected output to actual output, and how
+ it displays failures. See the documentation for `testmod` for
+ more information.
+ """
+ self._checker = checker or OutputChecker()
+ if verbose is None:
+ verbose = '-v' in sys.argv
+ self._verbose = verbose
+ self.optionflags = optionflags
+ self.original_optionflags = optionflags
+
+ # Keep track of the examples we've run.
+ self.tries = 0
+ self.failures = 0
+ self._name2ft = {}
+
+ # Create a fake output target for capturing doctest output.
+ self._fakeout = _SpoofOut()
+
+ #/////////////////////////////////////////////////////////////////
+ # Reporting methods
+ #/////////////////////////////////////////////////////////////////
+
+ def report_start(self, out, test, example):
+ """
+ Report that the test runner is about to process the given
+ example. (Only displays a message if verbose=True)
+ """
+ if self._verbose:
+ if example.want:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting:\n' + _indent(example.want))
+ else:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting nothing\n')
+
+ def report_success(self, out, test, example, got):
+ """
+ Report that the given example ran successfully. (Only
+ displays a message if verbose=True)
+ """
+ if self._verbose:
+ out("ok\n")
+
+ def report_failure(self, out, test, example, got):
+ """
+ Report that the given example failed.
+ """
+ out(self._failure_header(test, example) +
+ self._checker.output_difference(example, got, self.optionflags))
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ """
+ Report that the given example raised an unexpected exception.
+ """
+ out(self._failure_header(test, example) +
+ 'Exception raised:\n' + _indent(_exception_traceback(exc_info)))
+
+ def _failure_header(self, test, example):
+ out = [self.DIVIDER]
+ if test.filename:
+ if test.lineno is not None and example.lineno is not None:
+ lineno = test.lineno + example.lineno + 1
+ else:
+ lineno = '?'
+ out.append('File "%s", line %s, in %s' %
+ (test.filename, lineno, test.name))
+ else:
+ out.append('Line %s, in %s' % (example.lineno+1, test.name))
+ out.append('Failed example:')
+ source = example.source
+ out.append(_indent(source))
+ return '\n'.join(out)
+
+ #/////////////////////////////////////////////////////////////////
+ # DocTest Running
+ #/////////////////////////////////////////////////////////////////
+
+ def __run(self, test, compileflags, out):
+ """
+ Run the examples in `test`. Write the outcome of each example
+ with one of the `DocTestRunner.report_*` methods, using the
+ writer function `out`. `compileflags` is the set of compiler
+ flags that should be used to execute examples. Return a tuple
+ `(f, t)`, where `t` is the number of examples tried, and `f`
+ is the number of examples that failed. The examples are run
+ in the namespace `test.globs`.
+ """
+ # Keep track of the number of failures and tries.
+ failures = tries = 0
+
+ # Save the option flags (since option directives can be used
+ # to modify them).
+ original_optionflags = self.optionflags
+
+ SUCCESS, FAILURE, BOOM = range(3) # `outcome` state
+
+ check = self._checker.check_output
+
+ # Process each example.
+ for examplenum, example in enumerate(test.examples):
+
+ # If REPORT_ONLY_FIRST_FAILURE is set, then suppress
+ # reporting after the first failure.
+ quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and
+ failures > 0)
+
+ # Merge in the example's options.
+ self.optionflags = original_optionflags
+ if example.options:
+ for (optionflag, val) in example.options.items():
+ if val:
+ self.optionflags |= optionflag
+ else:
+ self.optionflags &= ~optionflag
+
+ # If 'SKIP' is set, then skip this example.
+ if self.optionflags & SKIP:
+ continue
+
+ # Record that we started this example.
+ tries += 1
+ if not quiet:
+ self.report_start(out, test, example)
+
+ # Use a special filename for compile(), so we can retrieve
+ # the source code during interactive debugging (see
+ # __patched_linecache_getlines).
+ filename = '' % (test.name, examplenum)
+
+ # Run the example in the given context (globs), and record
+ # any exception that gets raised. (But don't intercept
+ # keyboard interrupts.)
+ try:
+ # Don't blink! This is where the user's code gets run.
+ exec compile(example.source, filename, "single",
+ compileflags, 1) in test.globs
+ self.debugger.set_continue() # ==== Example Finished ====
+ exception = None
+ except KeyboardInterrupt:
+ raise
+ except:
+ exception = sys.exc_info()
+ self.debugger.set_continue() # ==== Example Finished ====
+
+ got = self._fakeout.getvalue() # the actual output
+ self._fakeout.truncate(0)
+ outcome = FAILURE # guilty until proved innocent or insane
+
+ # If the example executed without raising any exceptions,
+ # verify its output.
+ if exception is None:
+ if check(example.want, got, self.optionflags):
+ outcome = SUCCESS
+
+ # The example raised an exception: check if it was expected.
+ else:
+ exc_info = sys.exc_info()
+ exc_msg = traceback.format_exception_only(*exc_info[:2])[-1]
+ if not quiet:
+ got += _exception_traceback(exc_info)
+
+ # If `example.exc_msg` is None, then we weren't expecting
+ # an exception.
+ if example.exc_msg is None:
+ outcome = BOOM
+
+ # We expected an exception: see whether it matches.
+ elif check(example.exc_msg, exc_msg, self.optionflags):
+ outcome = SUCCESS
+
+ # Another chance if they didn't care about the detail.
+ elif self.optionflags & IGNORE_EXCEPTION_DETAIL:
+ if check(_strip_exception_details(example.exc_msg),
+ _strip_exception_details(exc_msg),
+ self.optionflags):
+ outcome = SUCCESS
+
+ # Report the outcome.
+ if outcome is SUCCESS:
+ if not quiet:
+ self.report_success(out, test, example, got)
+ elif outcome is FAILURE:
+ if not quiet:
+ self.report_failure(out, test, example, got)
+ failures += 1
+ elif outcome is BOOM:
+ if not quiet:
+ self.report_unexpected_exception(out, test, example,
+ exc_info)
+ failures += 1
+ else:
+ assert False, ("unknown outcome", outcome)
+
+ # Restore the option flags (in case they were modified)
+ self.optionflags = original_optionflags
+
+ # Record and return the number of failures and tries.
+ self.__record_outcome(test, failures, tries)
+ return TestResults(failures, tries)
+
+ def __record_outcome(self, test, f, t):
+ """
+ Record the fact that the given DocTest (`test`) generated `f`
+ failures out of `t` tried examples.
+ """
+ f2, t2 = self._name2ft.get(test.name, (0,0))
+ self._name2ft[test.name] = (f+f2, t+t2)
+ self.failures += f
+ self.tries += t
+
+ __LINECACHE_FILENAME_RE = re.compile(r'.+)'
+ r'\[(?P\d+)\]>$')
+ def __patched_linecache_getlines(self, filename, module_globals=None):
+ m = self.__LINECACHE_FILENAME_RE.match(filename)
+ if m and m.group('name') == self.test.name:
+ example = self.test.examples[int(m.group('examplenum'))]
+ source = example.source
+ if isinstance(source, unicode):
+ source = source.encode('ascii', 'backslashreplace')
+ return source.splitlines(True)
+ else:
+ return self.save_linecache_getlines(filename, module_globals)
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ """
+ Run the examples in `test`, and display the results using the
+ writer function `out`.
+
+ The examples are run in the namespace `test.globs`. If
+ `clear_globs` is true (the default), then this namespace will
+ be cleared after the test runs, to help with garbage
+ collection. If you would like to examine the namespace after
+ the test completes, then use `clear_globs=False`.
+
+ `compileflags` gives the set of flags that should be used by
+ the Python compiler when running the examples. If not
+ specified, then it will default to the set of future-import
+ flags that apply to `globs`.
+
+ The output of each example is checked using
+ `DocTestRunner.check_output`, and the results are formatted by
+ the `DocTestRunner.report_*` methods.
+ """
+ self.test = test
+
+ if compileflags is None:
+ compileflags = _extract_future_flags(test.globs)
+
+ save_stdout = sys.stdout
+ if out is None:
+ out = save_stdout.write
+ sys.stdout = self._fakeout
+
+ # Patch pdb.set_trace to restore sys.stdout during interactive
+ # debugging (so it's not still redirected to self._fakeout).
+ # Note that the interactive output will go to *our*
+ # save_stdout, even if that's not the real sys.stdout; this
+ # allows us to write test cases for the set_trace behavior.
+ save_set_trace = pdb.set_trace
+ self.debugger = _OutputRedirectingPdb(save_stdout)
+ self.debugger.reset()
+ pdb.set_trace = self.debugger.set_trace
+
+ # Patch linecache.getlines, so we can see the example's source
+ # when we're inside the debugger.
+ self.save_linecache_getlines = linecache.getlines
+ linecache.getlines = self.__patched_linecache_getlines
+
+ # Make sure sys.displayhook just prints the value to stdout
+ save_displayhook = sys.displayhook
+ sys.displayhook = sys.__displayhook__
+
+ try:
+ return self.__run(test, compileflags, out)
+ finally:
+ sys.stdout = save_stdout
+ pdb.set_trace = save_set_trace
+ linecache.getlines = self.save_linecache_getlines
+ sys.displayhook = save_displayhook
+ if clear_globs:
+ test.globs.clear()
+
+ #/////////////////////////////////////////////////////////////////
+ # Summarization
+ #/////////////////////////////////////////////////////////////////
+ def summarize(self, verbose=None):
+ """
+ Print a summary of all the test cases that have been run by
+ this DocTestRunner, and return a tuple `(f, t)`, where `f` is
+ the total number of failed examples, and `t` is the total
+ number of tried examples.
+
+ The optional `verbose` argument controls how detailed the
+ summary is. If the verbosity is not specified, then the
+ DocTestRunner's verbosity is used.
+ """
+ if verbose is None:
+ verbose = self._verbose
+ notests = []
+ passed = []
+ failed = []
+ totalt = totalf = 0
+ for x in self._name2ft.items():
+ name, (f, t) = x
+ assert f <= t
+ totalt += t
+ totalf += f
+ if t == 0:
+ notests.append(name)
+ elif f == 0:
+ passed.append( (name, t) )
+ else:
+ failed.append(x)
+ if verbose:
+ if notests:
+ print len(notests), "items had no tests:"
+ notests.sort()
+ for thing in notests:
+ print " ", thing
+ if passed:
+ print len(passed), "items passed all tests:"
+ passed.sort()
+ for thing, count in passed:
+ print " %3d tests in %s" % (count, thing)
+ if failed:
+ print self.DIVIDER
+ print len(failed), "items had failures:"
+ failed.sort()
+ for thing, (f, t) in failed:
+ print " %3d of %3d in %s" % (f, t, thing)
+ if verbose:
+ print totalt, "tests in", len(self._name2ft), "items."
+ print totalt - totalf, "passed and", totalf, "failed."
+ if totalf:
+ print "***Test Failed***", totalf, "failures."
+ elif verbose:
+ print "Test passed."
+ return TestResults(totalf, totalt)
+
+ #/////////////////////////////////////////////////////////////////
+ # Backward compatibility cruft to maintain doctest.master.
+ #/////////////////////////////////////////////////////////////////
+ def merge(self, other):
+ d = self._name2ft
+ for name, (f, t) in other._name2ft.items():
+ if name in d:
+ # Don't print here by default, since doing
+ # so breaks some of the buildbots
+ #print "*** DocTestRunner.merge: '" + name + "' in both" \
+ # " testers; summing outcomes."
+ f2, t2 = d[name]
+ f = f + f2
+ t = t + t2
+ d[name] = f, t
+
+class OutputChecker:
+ """
+ A class used to check the whether the actual output from a doctest
+ example matches the expected output. `OutputChecker` defines two
+ methods: `check_output`, which compares a given pair of outputs,
+ and returns true if they match; and `output_difference`, which
+ returns a string describing the differences between two outputs.
+ """
+ def check_output(self, want, got, optionflags):
+ """
+ Return True iff the actual output from an example (`got`)
+ matches the expected output (`want`). These strings are
+ always considered to match if they are identical; but
+ depending on what option flags the test runner is using,
+ several non-exact match types are also possible. See the
+ documentation for `TestRunner` for more information about
+ option flags.
+ """
+ # Handle the common case first, for efficiency:
+ # if they're string-identical, always return true.
+ if got == want:
+ return True
+
+ # The values True and False replaced 1 and 0 as the return
+ # value for boolean comparisons in Python 2.3.
+ if not (optionflags & DONT_ACCEPT_TRUE_FOR_1):
+ if (got,want) == ("True\n", "1\n"):
+ return True
+ if (got,want) == ("False\n", "0\n"):
+ return True
+
+ # can be used as a special sequence to signify a
+ # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ # Replace in want with a blank line.
+ want = re.sub('(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER),
+ '', want)
+ # If a line in got contains only spaces, then remove the
+ # spaces.
+ got = re.sub('(?m)^\s*?$', '', got)
+ if got == want:
+ return True
+
+ # This flag causes doctest to ignore any differences in the
+ # contents of whitespace strings. Note that this can be used
+ # in conjunction with the ELLIPSIS flag.
+ if optionflags & NORMALIZE_WHITESPACE:
+ got = ' '.join(got.split())
+ want = ' '.join(want.split())
+ if got == want:
+ return True
+
+ # The ELLIPSIS flag says to let the sequence "..." in `want`
+ # match any substring in `got`.
+ if optionflags & ELLIPSIS:
+ if _ellipsis_match(want, got):
+ return True
+
+ # We didn't find any match; return false.
+ return False
+
+ # Should we do a fancy diff?
+ def _do_a_fancy_diff(self, want, got, optionflags):
+ # Not unless they asked for a fancy diff.
+ if not optionflags & (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF):
+ return False
+
+ # If expected output uses ellipsis, a meaningful fancy diff is
+ # too hard ... or maybe not. In two real-life failures Tim saw,
+ # a diff was a major help anyway, so this is commented out.
+ # [todo] _ellipsis_match() knows which pieces do and don't match,
+ # and could be the basis for a kick-ass diff in this case.
+ ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want:
+ ## return False
+
+ # ndiff does intraline difference marking, so can be useful even
+ # for 1-line differences.
+ if optionflags & REPORT_NDIFF:
+ return True
+
+ # The other diff types need at least a few lines to be helpful.
+ return want.count('\n') > 2 and got.count('\n') > 2
+
+ def output_difference(self, example, got, optionflags):
+ """
+ Return a string describing the differences between the
+ expected output for a given example (`example`) and the actual
+ output (`got`). `optionflags` is the set of option flags used
+ to compare `want` and `got`.
+ """
+ want = example.want
+ # If s are being used, then replace blank lines
+ # with in the actual output string.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got)
+
+ # Check if we should use diff.
+ if self._do_a_fancy_diff(want, got, optionflags):
+ # Split want & got into lines.
+ want_lines = want.splitlines(True) # True == keep line ends
+ got_lines = got.splitlines(True)
+ # Use difflib to find their differences.
+ if optionflags & REPORT_UDIFF:
+ diff = difflib.unified_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'unified diff with -expected +actual'
+ elif optionflags & REPORT_CDIFF:
+ diff = difflib.context_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'context diff with expected followed by actual'
+ elif optionflags & REPORT_NDIFF:
+ engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK)
+ diff = list(engine.compare(want_lines, got_lines))
+ kind = 'ndiff with -expected +actual'
+ else:
+ assert 0, 'Bad diff option'
+ # Remove trailing whitespace on diff output.
+ diff = [line.rstrip() + '\n' for line in diff]
+ return 'Differences (%s):\n' % kind + _indent(''.join(diff))
+
+ # If we're not using diff, then simply list the expected
+ # output followed by the actual output.
+ if want and got:
+ return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
+ elif want:
+ return 'Expected:\n%sGot nothing\n' % _indent(want)
+ elif got:
+ return 'Expected nothing\nGot:\n%s' % _indent(got)
+ else:
+ return 'Expected nothing\nGot nothing\n'
+
+class DocTestFailure(Exception):
+ """A DocTest example has failed in debugging mode.
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - got: the actual output
+ """
+ def __init__(self, test, example, got):
+ self.test = test
+ self.example = example
+ self.got = got
+
+ def __str__(self):
+ return str(self.test)
+
+class UnexpectedException(Exception):
+ """A DocTest example has encountered an unexpected exception
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - exc_info: the exception info
+ """
+ def __init__(self, test, example, exc_info):
+ self.test = test
+ self.example = example
+ self.exc_info = exc_info
+
+ def __str__(self):
+ return str(self.test)
+
+class DebugRunner(DocTestRunner):
+ r"""Run doc tests but raise an exception as soon as there is a failure.
+
+ If an unexpected exception occurs, an UnexpectedException is raised.
+ It contains the test, the example, and the original exception:
+
+ >>> runner = DebugRunner(verbose=False)
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> try:
+ ... runner.run(test)
+ ... except UnexpectedException, failure:
+ ... pass
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[0], exc_info[1], exc_info[2]
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ We wrap the original exception to give the calling application
+ access to the test and example information.
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> try:
+ ... runner.run(test)
+ ... except DocTestFailure, failure:
+ ... pass
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ If a failure or error occurs, the globals are left intact:
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 1}
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... >>> raise KeyError
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ Traceback (most recent call last):
+ ...
+ UnexpectedException:
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 2}
+
+ But the globals are cleared if there is no error:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ TestResults(failed=0, attempted=1)
+
+ >>> test.globs
+ {}
+
+ """
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ r = DocTestRunner.run(self, test, compileflags, out, False)
+ if clear_globs:
+ test.globs.clear()
+ return r
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ raise UnexpectedException(test, example, exc_info)
+
+ def report_failure(self, out, test, example, got):
+ raise DocTestFailure(test, example, got)
+
+######################################################################
+## 6. Test Functions
+######################################################################
+# These should be backwards compatible.
+
+# For backward compatibility, a global instance of a DocTestRunner
+# class, updated by testmod.
+master = None
+
+def testmod(m=None, name=None, globs=None, verbose=None,
+ report=True, optionflags=0, extraglobs=None,
+ raise_on_error=False, exclude_empty=False):
+ """m=None, name=None, globs=None, verbose=None, report=True,
+ optionflags=0, extraglobs=None, raise_on_error=False,
+ exclude_empty=False
+
+ Test examples in docstrings in functions and classes reachable
+ from module m (or the current module if m is not supplied), starting
+ with m.__doc__.
+
+ Also test examples reachable from dict m.__test__ if it exists and is
+ not None. m.__test__ maps names to functions, classes and strings;
+ function and class docstrings are tested even if the name is private;
+ strings are tested directly, as if they were docstrings.
+
+ Return (#failures, #tests).
+
+ See help(doctest) for an overview.
+
+ Optional keyword arg "name" gives the name of the module; by default
+ use m.__name__.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use m.__dict__. A copy of this
+ dict is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used. This is new in 2.4.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. This is new in 2.3. Possible values (see the
+ docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ # If no module was given, then use __main__.
+ if m is None:
+ # DWA - m will still be None if this wasn't invoked from the command
+ # line, in which case the following TypeError is about as good an error
+ # as we should expect
+ m = sys.modules.get('__main__')
+
+ # Check that we were actually given a module.
+ if not inspect.ismodule(m):
+ raise TypeError("testmod: module required; %r" % (m,))
+
+ # If no name was given, then use the module's name.
+ if name is None:
+ name = m.__name__
+
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(exclude_empty=exclude_empty)
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ for test in finder.find(m, name, globs=globs, extraglobs=extraglobs):
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return TestResults(runner.failures, runner.tries)
+
+def testfile(filename, module_relative=True, name=None, package=None,
+ globs=None, verbose=None, report=True, optionflags=0,
+ extraglobs=None, raise_on_error=False, parser=DocTestParser(),
+ encoding=None):
+ """
+ Test examples in the given file. Return (#failures, #tests).
+
+ Optional keyword arg "module_relative" specifies how filenames
+ should be interpreted:
+
+ - If "module_relative" is True (the default), then "filename"
+ specifies a module-relative path. By default, this path is
+ relative to the calling module's directory; but if the
+ "package" argument is specified, then it is relative to that
+ package. To ensure os-independence, "filename" should use
+ "/" characters to separate path segments, and should not
+ be an absolute path (i.e., it may not begin with "/").
+
+ - If "module_relative" is False, then "filename" specifies an
+ os-specific path. The path may be absolute or relative (to
+ the current working directory).
+
+ Optional keyword arg "name" gives the name of the test; by default
+ use the file's basename.
+
+ Optional keyword argument "package" is a Python package or the
+ name of a Python package whose directory should be used as the
+ base directory for a module relative filename. If no package is
+ specified, then the calling module's directory is used as the base
+ directory for module relative filenames. It is an error to
+ specify "package" if "module_relative" is False.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use {}. A copy of this dict
+ is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. Possible values (see the docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Optional keyword arg "parser" specifies a DocTestParser (or
+ subclass) that should be used to extract tests from the files.
+
+ Optional keyword arg "encoding" specifies an encoding that should
+ be used to convert the file to unicode.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path
+ text, filename = _load_testfile(filename, package, module_relative)
+
+ # If no name was given, then use the file's name.
+ if name is None:
+ name = os.path.basename(filename)
+
+ # Assemble the globals.
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+ if '__name__' not in globs:
+ globs['__name__'] = '__main__'
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ if encoding is not None:
+ text = text.decode(encoding)
+
+ # Read the file, convert it to a test, and run it.
+ test = parser.get_doctest(text, globs, name, filename, 0)
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return TestResults(runner.failures, runner.tries)
+
+def run_docstring_examples(f, globs, verbose=False, name="NoName",
+ compileflags=None, optionflags=0):
+ """
+ Test examples in the given object's docstring (`f`), using `globs`
+ as globals. Optional argument `name` is used in failure messages.
+ If the optional argument `verbose` is true, then generate output
+ even if there are no failures.
+
+ `compileflags` gives the set of flags that should be used by the
+ Python compiler when running the examples. If not specified, then
+ it will default to the set of future-import flags that apply to
+ `globs`.
+
+ Optional keyword arg `optionflags` specifies options for the
+ testing and output. See the documentation for `testmod` for more
+ information.
+ """
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(verbose=verbose, recurse=False)
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+ for test in finder.find(f, name, globs=globs):
+ runner.run(test, compileflags=compileflags)
+
+######################################################################
+## 7. Tester
+######################################################################
+# This is provided only for backwards compatibility. It's not
+# actually used in any way.
+
+class Tester:
+ def __init__(self, mod=None, globs=None, verbose=None, optionflags=0):
+
+ warnings.warn("class Tester is deprecated; "
+ "use class doctest.DocTestRunner instead",
+ DeprecationWarning, stacklevel=2)
+ if mod is None and globs is None:
+ raise TypeError("Tester.__init__: must specify mod or globs")
+ if mod is not None and not inspect.ismodule(mod):
+ raise TypeError("Tester.__init__: mod must be a module; %r" %
+ (mod,))
+ if globs is None:
+ globs = mod.__dict__
+ self.globs = globs
+
+ self.verbose = verbose
+ self.optionflags = optionflags
+ self.testfinder = DocTestFinder()
+ self.testrunner = DocTestRunner(verbose=verbose,
+ optionflags=optionflags)
+
+ def runstring(self, s, name):
+ test = DocTestParser().get_doctest(s, self.globs, name, None, None)
+ if self.verbose:
+ print "Running string", name
+ (f,t) = self.testrunner.run(test)
+ if self.verbose:
+ print f, "of", t, "examples failed in string", name
+ return TestResults(f,t)
+
+ def rundoc(self, object, name=None, module=None):
+ f = t = 0
+ tests = self.testfinder.find(object, name, module=module,
+ globs=self.globs)
+ for test in tests:
+ (f2, t2) = self.testrunner.run(test)
+ (f,t) = (f+f2, t+t2)
+ return TestResults(f,t)
+
+ def rundict(self, d, name, module=None):
+ import types
+ m = types.ModuleType(name)
+ m.__dict__.update(d)
+ if module is None:
+ module = False
+ return self.rundoc(m, name, module)
+
+ def run__test__(self, d, name):
+ import types
+ m = types.ModuleType(name)
+ m.__test__ = d
+ return self.rundoc(m, name)
+
+ def summarize(self, verbose=None):
+ return self.testrunner.summarize(verbose)
+
+ def merge(self, other):
+ self.testrunner.merge(other.testrunner)
+
+######################################################################
+## 8. Unittest Support
+######################################################################
+
+_unittest_reportflags = 0
+
+def set_unittest_reportflags(flags):
+ """Sets the unittest option flags.
+
+ The old flag is returned so that a runner could restore the old
+ value if it wished to:
+
+ >>> import doctest
+ >>> old = doctest._unittest_reportflags
+ >>> doctest.set_unittest_reportflags(REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE) == old
+ True
+
+ >>> doctest._unittest_reportflags == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+
+ Only reporting flags can be set:
+
+ >>> doctest.set_unittest_reportflags(ELLIPSIS)
+ Traceback (most recent call last):
+ ...
+ ValueError: ('Only reporting flags allowed', 8)
+
+ >>> doctest.set_unittest_reportflags(old) == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+ """
+ global _unittest_reportflags
+
+ if (flags & REPORTING_FLAGS) != flags:
+ raise ValueError("Only reporting flags allowed", flags)
+ old = _unittest_reportflags
+ _unittest_reportflags = flags
+ return old
+
+
+class DocTestCase(unittest.TestCase):
+
+ def __init__(self, test, optionflags=0, setUp=None, tearDown=None,
+ checker=None):
+
+ unittest.TestCase.__init__(self)
+ self._dt_optionflags = optionflags
+ self._dt_checker = checker
+ self._dt_test = test
+ self._dt_setUp = setUp
+ self._dt_tearDown = tearDown
+
+ def setUp(self):
+ test = self._dt_test
+
+ if self._dt_setUp is not None:
+ self._dt_setUp(test)
+
+ def tearDown(self):
+ test = self._dt_test
+
+ if self._dt_tearDown is not None:
+ self._dt_tearDown(test)
+
+ test.globs.clear()
+
+ def runTest(self):
+ test = self._dt_test
+ old = sys.stdout
+ new = StringIO()
+ optionflags = self._dt_optionflags
+
+ if not (optionflags & REPORTING_FLAGS):
+ # The option flags don't include any reporting flags,
+ # so add the default reporting flags
+ optionflags |= _unittest_reportflags
+
+ runner = DocTestRunner(optionflags=optionflags,
+ checker=self._dt_checker, verbose=False)
+
+ try:
+ runner.DIVIDER = "-"*70
+ failures, tries = runner.run(
+ test, out=new.write, clear_globs=False)
+ finally:
+ sys.stdout = old
+
+ if failures:
+ raise self.failureException(self.format_failure(new.getvalue()))
+
+ def format_failure(self, err):
+ test = self._dt_test
+ if test.lineno is None:
+ lineno = 'unknown line number'
+ else:
+ lineno = '%s' % test.lineno
+ lname = '.'.join(test.name.split('.')[-1:])
+ return ('Failed doctest test for %s\n'
+ ' File "%s", line %s, in %s\n\n%s'
+ % (test.name, test.filename, lineno, lname, err)
+ )
+
+ def debug(self):
+ r"""Run the test case without results and without catching exceptions
+
+ The unit test framework includes a debug method on test cases
+ and test suites to support post-mortem debugging. The test code
+ is run in such a way that errors are not caught. This way a
+ caller can catch the errors and initiate post-mortem debugging.
+
+ The DocTestCase provides a debug method that raises
+ UnexpectedException errors if there is an unexpected
+ exception:
+
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+ >>> try:
+ ... case.debug()
+ ... except UnexpectedException, failure:
+ ... pass
+
+ The UnexpectedException contains the test, the example, and
+ the original exception:
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[0], exc_info[1], exc_info[2]
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+
+ >>> try:
+ ... case.debug()
+ ... except DocTestFailure, failure:
+ ... pass
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ """
+
+ self.setUp()
+ runner = DebugRunner(optionflags=self._dt_optionflags,
+ checker=self._dt_checker, verbose=False)
+ runner.run(self._dt_test, clear_globs=False)
+ self.tearDown()
+
+ def id(self):
+ return self._dt_test.name
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self._dt_test == other._dt_test and \
+ self._dt_optionflags == other._dt_optionflags and \
+ self._dt_setUp == other._dt_setUp and \
+ self._dt_tearDown == other._dt_tearDown and \
+ self._dt_checker == other._dt_checker
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash((self._dt_optionflags, self._dt_setUp, self._dt_tearDown,
+ self._dt_checker))
+
+ def __repr__(self):
+ name = self._dt_test.name.split('.')
+ return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
+
+ __str__ = __repr__
+
+ def shortDescription(self):
+ return "Doctest: " + self._dt_test.name
+
+class SkipDocTestCase(DocTestCase):
+ def __init__(self, module):
+ self.module = module
+ DocTestCase.__init__(self, None)
+
+ def setUp(self):
+ self.skipTest("DocTestSuite will not work with -O2 and above")
+
+ def test_skip(self):
+ pass
+
+ def shortDescription(self):
+ return "Skipping tests from %s" % self.module.__name__
+
+ __str__ = shortDescription
+
+
+def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None,
+ **options):
+ """
+ Convert doctest tests for a module to a unittest test suite.
+
+ This converts each documentation string in a module that
+ contains doctest tests to a unittest test case. If any of the
+ tests in a doc string fail, then the test case fails. An exception
+ is raised showing the name of the file containing the test and a
+ (sometimes approximate) line number.
+
+ The `module` argument provides the module to be tested. The argument
+ can be either a module or a module name.
+
+ If no argument is given, the calling module is used.
+
+ A number of options may be provided as keyword arguments:
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+ """
+
+ if test_finder is None:
+ test_finder = DocTestFinder()
+
+ module = _normalize_module(module)
+ tests = test_finder.find(module, globs=globs, extraglobs=extraglobs)
+
+ if not tests and sys.flags.optimize >=2:
+ # Skip doctests when running with -O2
+ suite = unittest.TestSuite()
+ suite.addTest(SkipDocTestCase(module))
+ return suite
+ elif not tests:
+ # Why do we want to do this? Because it reveals a bug that might
+ # otherwise be hidden.
+ # It is probably a bug that this exception is not also raised if the
+ # number of doctest examples in tests is zero (i.e. if no doctest
+ # examples were found). However, we should probably not be raising
+ # an exception at all here, though it is too late to make this change
+ # for a maintenance release. See also issue #14649.
+ raise ValueError(module, "has no docstrings")
+
+ tests.sort()
+ suite = unittest.TestSuite()
+
+ for test in tests:
+ if len(test.examples) == 0:
+ continue
+ if not test.filename:
+ filename = module.__file__
+ if filename[-4:] in (".pyc", ".pyo"):
+ filename = filename[:-1]
+ test.filename = filename
+ suite.addTest(DocTestCase(test, **options))
+
+ return suite
+
+class DocFileCase(DocTestCase):
+
+ def id(self):
+ return '_'.join(self._dt_test.name.split('.'))
+
+ def __repr__(self):
+ return self._dt_test.filename
+ __str__ = __repr__
+
+ def format_failure(self, err):
+ return ('Failed doctest test for %s\n File "%s", line 0\n\n%s'
+ % (self._dt_test.name, self._dt_test.filename, err)
+ )
+
+def DocFileTest(path, module_relative=True, package=None,
+ globs=None, parser=DocTestParser(),
+ encoding=None, **options):
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path.
+ doc, path = _load_testfile(path, package, module_relative)
+
+ if "__file__" not in globs:
+ globs["__file__"] = path
+
+ # Find the file and read it.
+ name = os.path.basename(path)
+
+ # If an encoding is specified, use it to convert the file to unicode
+ if encoding is not None:
+ doc = doc.decode(encoding)
+
+ # Convert it to a test, and wrap it in a DocFileCase.
+ test = parser.get_doctest(doc, globs, name, path, 0)
+ return DocFileCase(test, **options)
+
+def DocFileSuite(*paths, **kw):
+ """A unittest suite for one or more doctest files.
+
+ The path to each doctest file is given as a string; the
+ interpretation of that string depends on the keyword argument
+ "module_relative".
+
+ A number of options may be provided as keyword arguments:
+
+ module_relative
+ If "module_relative" is True, then the given file paths are
+ interpreted as os-independent module-relative paths. By
+ default, these paths are relative to the calling module's
+ directory; but if the "package" argument is specified, then
+ they are relative to that package. To ensure os-independence,
+ "filename" should use "/" characters to separate path
+ segments, and may not be an absolute path (i.e., it may not
+ begin with "/").
+
+ If "module_relative" is False, then the given file paths are
+ interpreted as os-specific paths. These paths may be absolute
+ or relative (to the current working directory).
+
+ package
+ A Python package or the name of a Python package whose directory
+ should be used as the base directory for module relative paths.
+ If "package" is not specified, then the calling module's
+ directory is used as the base directory for module relative
+ filenames. It is an error to specify "package" if
+ "module_relative" is False.
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+
+ parser
+ A DocTestParser (or subclass) that should be used to extract
+ tests from the files.
+
+ encoding
+ An encoding that will be used to convert the files to unicode.
+ """
+ suite = unittest.TestSuite()
+
+ # We do this here so that _normalize_module is called at the right
+ # level. If it were called in DocFileTest, then this function
+ # would be the caller and we might guess the package incorrectly.
+ if kw.get('module_relative', True):
+ kw['package'] = _normalize_module(kw.get('package'))
+
+ for path in paths:
+ suite.addTest(DocFileTest(path, **kw))
+
+ return suite
+
+######################################################################
+## 9. Debugging Support
+######################################################################
+
+def script_from_examples(s):
+ r"""Extract script from text with examples.
+
+ Converts text with examples to a Python script. Example input is
+ converted to regular code. Example output and all other words
+ are converted to comments:
+
+ >>> text = '''
+ ... Here are examples of simple math.
+ ...
+ ... Python has super accurate integer addition
+ ...
+ ... >>> 2 + 2
+ ... 5
+ ...
+ ... And very friendly error messages:
+ ...
+ ... >>> 1/0
+ ... To Infinity
+ ... And
+ ... Beyond
+ ...
+ ... You can use logic if you want:
+ ...
+ ... >>> if 0:
+ ... ... blah
+ ... ... blah
+ ... ...
+ ...
+ ... Ho hum
+ ... '''
+
+ >>> print script_from_examples(text)
+ # Here are examples of simple math.
+ #
+ # Python has super accurate integer addition
+ #
+ 2 + 2
+ # Expected:
+ ## 5
+ #
+ # And very friendly error messages:
+ #
+ 1/0
+ # Expected:
+ ## To Infinity
+ ## And
+ ## Beyond
+ #
+ # You can use logic if you want:
+ #
+ if 0:
+ blah
+ blah
+ #
+ # Ho hum
+
+ """
+ output = []
+ for piece in DocTestParser().parse(s):
+ if isinstance(piece, Example):
+ # Add the example's source code (strip trailing NL)
+ output.append(piece.source[:-1])
+ # Add the expected output:
+ want = piece.want
+ if want:
+ output.append('# Expected:')
+ output += ['## '+l for l in want.split('\n')[:-1]]
+ else:
+ # Add non-example text.
+ output += [_comment_line(l)
+ for l in piece.split('\n')[:-1]]
+
+ # Trim junk on both ends.
+ while output and output[-1] == '#':
+ output.pop()
+ while output and output[0] == '#':
+ output.pop(0)
+ # Combine the output, and return it.
+ # Add a courtesy newline to prevent exec from choking (see bug #1172785)
+ return '\n'.join(output) + '\n'
+
+def testsource(module, name):
+ """Extract the test sources from a doctest docstring as a script.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the doc string with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ tests = DocTestFinder().find(module)
+ test = [t for t in tests if t.name == name]
+ if not test:
+ raise ValueError(name, "not found in tests")
+ test = test[0]
+ testsrc = script_from_examples(test.docstring)
+ return testsrc
+
+def debug_src(src, pm=False, globs=None):
+ """Debug a single doctest docstring, in argument `src`'"""
+ testsrc = script_from_examples(src)
+ debug_script(testsrc, pm, globs)
+
+def debug_script(src, pm=False, globs=None):
+ "Debug a test script. `src` is the script, as a string."
+ import pdb
+
+ # Note that tempfile.NameTemporaryFile() cannot be used. As the
+ # docs say, a file so created cannot be opened by name a second time
+ # on modern Windows boxes, and execfile() needs to open it.
+ srcfilename = tempfile.mktemp(".py", "doctestdebug")
+ f = open(srcfilename, 'w')
+ f.write(src)
+ f.close()
+
+ try:
+ if globs:
+ globs = globs.copy()
+ else:
+ globs = {}
+
+ if pm:
+ try:
+ execfile(srcfilename, globs, globs)
+ except:
+ print sys.exc_info()[1]
+ pdb.post_mortem(sys.exc_info()[2])
+ else:
+ # Note that %r is vital here. '%s' instead can, e.g., cause
+ # backslashes to get treated as metacharacters on Windows.
+ pdb.run("execfile(%r)" % srcfilename, globs, globs)
+
+ finally:
+ os.remove(srcfilename)
+
+def debug(module, name, pm=False):
+ """Debug a single doctest docstring.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the docstring with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ testsrc = testsource(module, name)
+ debug_script(testsrc, pm, module.__dict__)
+
+######################################################################
+## 10. Example Usage
+######################################################################
+class _TestClass:
+ """
+ A pointless class, for sanity-checking of docstring testing.
+
+ Methods:
+ square()
+ get()
+
+ >>> _TestClass(13).get() + _TestClass(-12).get()
+ 1
+ >>> hex(_TestClass(13).square().get())
+ '0xa9'
+ """
+
+ def __init__(self, val):
+ """val -> _TestClass object with associated value val.
+
+ >>> t = _TestClass(123)
+ >>> print t.get()
+ 123
+ """
+
+ self.val = val
+
+ def square(self):
+ """square() -> square TestClass's associated value
+
+ >>> _TestClass(13).square().get()
+ 169
+ """
+
+ self.val = self.val ** 2
+ return self
+
+ def get(self):
+ """get() -> return TestClass's associated value.
+
+ >>> x = _TestClass(-42)
+ >>> print x.get()
+ -42
+ """
+
+ return self.val
+
+__test__ = {"_TestClass": _TestClass,
+ "string": r"""
+ Example of a string object, searched as-is.
+ >>> x = 1; y = 2
+ >>> x + y, x * y
+ (3, 2)
+ """,
+
+ "bool-int equivalence": r"""
+ In 2.2, boolean expressions displayed
+ 0 or 1. By default, we still accept
+ them. This can be disabled by passing
+ DONT_ACCEPT_TRUE_FOR_1 to the new
+ optionflags argument.
+ >>> 4 == 4
+ 1
+ >>> 4 == 4
+ True
+ >>> 4 > 4
+ 0
+ >>> 4 > 4
+ False
+ """,
+
+ "blank lines": r"""
+ Blank lines can be marked with :
+ >>> print 'foo\n\nbar\n'
+ foo
+
+ bar
+
+ """,
+
+ "ellipsis": r"""
+ If the ellipsis flag is used, then '...' can be used to
+ elide substrings in the desired output:
+ >>> print range(1000) #doctest: +ELLIPSIS
+ [0, 1, 2, ..., 999]
+ """,
+
+ "whitespace normalization": r"""
+ If the whitespace normalization flag is used, then
+ differences in whitespace are ignored.
+ >>> print range(30) #doctest: +NORMALIZE_WHITESPACE
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29]
+ """,
+ }
+
+
+def _test():
+ testfiles = [arg for arg in sys.argv[1:] if arg and arg[0] != '-']
+ if not testfiles:
+ name = os.path.basename(sys.argv[0])
+ if '__loader__' in globals(): # python -m
+ name, _ = os.path.splitext(name)
+ print("usage: {0} [-v] file ...".format(name))
+ return 2
+ for filename in testfiles:
+ if filename.endswith(".py"):
+ # It is a module -- insert its dir into sys.path and try to
+ # import it. If it is part of a package, that possibly
+ # won't work because of package imports.
+ dirname, filename = os.path.split(filename)
+ sys.path.insert(0, dirname)
+ m = __import__(filename[:-3])
+ del sys.path[0]
+ failures, _ = testmod(m)
+ else:
+ failures, _ = testfile(filename, module_relative=False)
+ if failures:
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(_test())
diff --git a/cashew/Lib/dumbdbm.py b/cashew/Lib/dumbdbm.py
new file mode 100644
index 0000000..ac73b89
--- /dev/null
+++ b/cashew/Lib/dumbdbm.py
@@ -0,0 +1,253 @@
+"""A dumb and slow but simple dbm clone.
+
+For database spam, spam.dir contains the index (a text file),
+spam.bak *may* contain a backup of the index (also a text file),
+while spam.dat contains the data (a binary file).
+
+XXX TO DO:
+
+- seems to contain a bug when updating...
+
+- reclaim free space (currently, space once occupied by deleted or expanded
+items is never reused)
+
+- support concurrent access (currently, if two processes take turns making
+updates, they can mess up the index)
+
+- support efficient access to large databases (currently, the whole index
+is read when the database is opened, and some updates rewrite the whole index)
+
+- support opening for read-only (flag = 'm')
+
+"""
+
+import ast as _ast
+import os as _os
+import __builtin__
+import UserDict
+
+_open = __builtin__.open
+
+_BLOCKSIZE = 512
+
+error = IOError # For anydbm
+
+class _Database(UserDict.DictMixin):
+
+ # The on-disk directory and data files can remain in mutually
+ # inconsistent states for an arbitrarily long time (see comments
+ # at the end of __setitem__). This is only repaired when _commit()
+ # gets called. One place _commit() gets called is from __del__(),
+ # and if that occurs at program shutdown time, module globals may
+ # already have gotten rebound to None. Since it's crucial that
+ # _commit() finish successfully, we can't ignore shutdown races
+ # here, and _commit() must not reference any globals.
+ _os = _os # for _commit()
+ _open = _open # for _commit()
+
+ def __init__(self, filebasename, mode, flag='c'):
+ self._mode = mode
+ self._readonly = (flag == 'r')
+
+ # The directory file is a text file. Each line looks like
+ # "%r, (%d, %d)\n" % (key, pos, siz)
+ # where key is the string key, pos is the offset into the dat
+ # file of the associated value's first byte, and siz is the number
+ # of bytes in the associated value.
+ self._dirfile = filebasename + _os.extsep + 'dir'
+
+ # The data file is a binary file pointed into by the directory
+ # file, and holds the values associated with keys. Each value
+ # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
+ # binary 8-bit string value.
+ self._datfile = filebasename + _os.extsep + 'dat'
+ self._bakfile = filebasename + _os.extsep + 'bak'
+
+ # The index is an in-memory dict, mirroring the directory file.
+ self._index = None # maps keys to (pos, siz) pairs
+
+ # Mod by Jack: create data file if needed
+ try:
+ f = _open(self._datfile, 'r')
+ except IOError:
+ with _open(self._datfile, 'w') as f:
+ self._chmod(self._datfile)
+ else:
+ f.close()
+ self._update()
+
+ # Read directory file into the in-memory index dict.
+ def _update(self):
+ self._index = {}
+ try:
+ f = _open(self._dirfile)
+ except IOError:
+ self._modified = not self._readonly
+ else:
+ self._modified = False
+ with f:
+ for line in f:
+ line = line.rstrip()
+ key, pos_and_siz_pair = _ast.literal_eval(line)
+ self._index[key] = pos_and_siz_pair
+
+ # Write the index dict to the directory file. The original directory
+ # file (if any) is renamed with a .bak extension first. If a .bak
+ # file currently exists, it's deleted.
+ def _commit(self):
+ # CAUTION: It's vital that _commit() succeed, and _commit() can
+ # be called from __del__(). Therefore we must never reference a
+ # global in this routine.
+ if self._index is None or not self._modified:
+ return # nothing to do
+
+ try:
+ self._os.unlink(self._bakfile)
+ except self._os.error:
+ pass
+
+ try:
+ self._os.rename(self._dirfile, self._bakfile)
+ except self._os.error:
+ pass
+
+ with self._open(self._dirfile, 'w') as f:
+ self._chmod(self._dirfile)
+ for key, pos_and_siz_pair in self._index.iteritems():
+ f.write("%r, %r\n" % (key, pos_and_siz_pair))
+
+ sync = _commit
+
+ def __getitem__(self, key):
+ pos, siz = self._index[key] # may raise KeyError
+ with _open(self._datfile, 'rb') as f:
+ f.seek(pos)
+ dat = f.read(siz)
+ return dat
+
+ # Append val to the data file, starting at a _BLOCKSIZE-aligned
+ # offset. The data file is first padded with NUL bytes (if needed)
+ # to get to an aligned offset. Return pair
+ # (starting offset of val, len(val))
+ def _addval(self, val):
+ with _open(self._datfile, 'rb+') as f:
+ f.seek(0, 2)
+ pos = int(f.tell())
+ npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
+ f.write('\0'*(npos-pos))
+ pos = npos
+ f.write(val)
+ return (pos, len(val))
+
+ # Write val to the data file, starting at offset pos. The caller
+ # is responsible for ensuring that there's enough room starting at
+ # pos to hold val, without overwriting some other value. Return
+ # pair (pos, len(val)).
+ def _setval(self, pos, val):
+ with _open(self._datfile, 'rb+') as f:
+ f.seek(pos)
+ f.write(val)
+ return (pos, len(val))
+
+ # key is a new key whose associated value starts in the data file
+ # at offset pos and with length siz. Add an index record to
+ # the in-memory index dict, and append one to the directory file.
+ def _addkey(self, key, pos_and_siz_pair):
+ self._index[key] = pos_and_siz_pair
+ with _open(self._dirfile, 'a') as f:
+ self._chmod(self._dirfile)
+ f.write("%r, %r\n" % (key, pos_and_siz_pair))
+
+ def __setitem__(self, key, val):
+ if not type(key) == type('') == type(val):
+ raise TypeError, "keys and values must be strings"
+ self._modified = True
+ if key not in self._index:
+ self._addkey(key, self._addval(val))
+ else:
+ # See whether the new value is small enough to fit in the
+ # (padded) space currently occupied by the old value.
+ pos, siz = self._index[key]
+ oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
+ newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
+ if newblocks <= oldblocks:
+ self._index[key] = self._setval(pos, val)
+ else:
+ # The new value doesn't fit in the (padded) space used
+ # by the old value. The blocks used by the old value are
+ # forever lost.
+ self._index[key] = self._addval(val)
+
+ # Note that _index may be out of synch with the directory
+ # file now: _setval() and _addval() don't update the directory
+ # file. This also means that the on-disk directory and data
+ # files are in a mutually inconsistent state, and they'll
+ # remain that way until _commit() is called. Note that this
+ # is a disaster (for the database) if the program crashes
+ # (so that _commit() never gets called).
+
+ def __delitem__(self, key):
+ self._modified = True
+ # The blocks used by the associated value are lost.
+ del self._index[key]
+ # XXX It's unclear why we do a _commit() here (the code always
+ # XXX has, so I'm not changing it). _setitem__ doesn't try to
+ # XXX keep the directory file in synch. Why should we? Or
+ # XXX why shouldn't __setitem__?
+ self._commit()
+
+ def keys(self):
+ return self._index.keys()
+
+ def has_key(self, key):
+ return key in self._index
+
+ def __contains__(self, key):
+ return key in self._index
+
+ def iterkeys(self):
+ return self._index.iterkeys()
+ __iter__ = iterkeys
+
+ def __len__(self):
+ return len(self._index)
+
+ def close(self):
+ try:
+ self._commit()
+ finally:
+ self._index = self._datfile = self._dirfile = self._bakfile = None
+
+ __del__ = close
+
+ def _chmod (self, file):
+ if hasattr(self._os, 'chmod'):
+ self._os.chmod(file, self._mode)
+
+
+def open(file, flag=None, mode=0666):
+ """Open the database file, filename, and return corresponding object.
+
+ The flag argument, used to control how the database is opened in the
+ other DBM implementations, is ignored in the dumbdbm module; the
+ database is always opened for update, and will be created if it does
+ not exist.
+
+ The optional mode argument is the UNIX mode of the file, used only when
+ the database has to be created. It defaults to octal code 0666 (and
+ will be modified by the prevailing umask).
+
+ """
+ # flag argument is currently ignored
+
+ # Modify mode depending on the umask
+ try:
+ um = _os.umask(0)
+ _os.umask(um)
+ except AttributeError:
+ pass
+ else:
+ # Turn off any bits that are set in the umask
+ mode = mode & (~um)
+
+ return _Database(file, mode, flag)
diff --git a/cashew/Lib/dummy_thread.py b/cashew/Lib/dummy_thread.py
new file mode 100644
index 0000000..198dc49
--- /dev/null
+++ b/cashew/Lib/dummy_thread.py
@@ -0,0 +1,145 @@
+"""Drop-in replacement for the thread module.
+
+Meant to be used as a brain-dead substitute so that threaded code does
+not need to be rewritten for when the thread module is not present.
+
+Suggested usage is::
+
+ try:
+ import thread
+ except ImportError:
+ import dummy_thread as thread
+
+"""
+# Exports only things specified by thread documentation;
+# skipping obsolete synonyms allocate(), start_new(), exit_thread().
+__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock',
+ 'interrupt_main', 'LockType']
+
+import traceback as _traceback
+
+class error(Exception):
+ """Dummy implementation of thread.error."""
+
+ def __init__(self, *args):
+ self.args = args
+
+def start_new_thread(function, args, kwargs={}):
+ """Dummy implementation of thread.start_new_thread().
+
+ Compatibility is maintained by making sure that ``args`` is a
+ tuple and ``kwargs`` is a dictionary. If an exception is raised
+ and it is SystemExit (which can be done by thread.exit()) it is
+ caught and nothing is done; all other exceptions are printed out
+ by using traceback.print_exc().
+
+ If the executed function calls interrupt_main the KeyboardInterrupt will be
+ raised when the function returns.
+
+ """
+ if type(args) != type(tuple()):
+ raise TypeError("2nd arg must be a tuple")
+ if type(kwargs) != type(dict()):
+ raise TypeError("3rd arg must be a dict")
+ global _main
+ _main = False
+ try:
+ function(*args, **kwargs)
+ except SystemExit:
+ pass
+ except:
+ _traceback.print_exc()
+ _main = True
+ global _interrupt
+ if _interrupt:
+ _interrupt = False
+ raise KeyboardInterrupt
+
+def exit():
+ """Dummy implementation of thread.exit()."""
+ raise SystemExit
+
+def get_ident():
+ """Dummy implementation of thread.get_ident().
+
+ Since this module should only be used when threadmodule is not
+ available, it is safe to assume that the current process is the
+ only thread. Thus a constant can be safely returned.
+ """
+ return -1
+
+def allocate_lock():
+ """Dummy implementation of thread.allocate_lock()."""
+ return LockType()
+
+def stack_size(size=None):
+ """Dummy implementation of thread.stack_size()."""
+ if size is not None:
+ raise error("setting thread stack size not supported")
+ return 0
+
+class LockType(object):
+ """Class implementing dummy implementation of thread.LockType.
+
+ Compatibility is maintained by maintaining self.locked_status
+ which is a boolean that stores the state of the lock. Pickling of
+ the lock, though, should not be done since if the thread module is
+ then used with an unpickled ``lock()`` from here problems could
+ occur from this class not having atomic methods.
+
+ """
+
+ def __init__(self):
+ self.locked_status = False
+
+ def acquire(self, waitflag=None):
+ """Dummy implementation of acquire().
+
+ For blocking calls, self.locked_status is automatically set to
+ True and returned appropriately based on value of
+ ``waitflag``. If it is non-blocking, then the value is
+ actually checked and not set if it is already acquired. This
+ is all done so that threading.Condition's assert statements
+ aren't triggered and throw a little fit.
+
+ """
+ if waitflag is None or waitflag:
+ self.locked_status = True
+ return True
+ else:
+ if not self.locked_status:
+ self.locked_status = True
+ return True
+ else:
+ return False
+
+ __enter__ = acquire
+
+ def __exit__(self, typ, val, tb):
+ self.release()
+
+ def release(self):
+ """Release the dummy lock."""
+ # XXX Perhaps shouldn't actually bother to test? Could lead
+ # to problems for complex, threaded code.
+ if not self.locked_status:
+ raise error
+ self.locked_status = False
+ return True
+
+ def locked(self):
+ return self.locked_status
+
+# Used to signal that interrupt_main was called in a "thread"
+_interrupt = False
+# True when not executing in a "thread"
+_main = True
+
+def interrupt_main():
+ """Set _interrupt flag to True to have start_new_thread raise
+ KeyboardInterrupt upon exiting."""
+ if _main:
+ raise KeyboardInterrupt
+ else:
+ global _interrupt
+ _interrupt = True
diff --git a/cashew/Lib/dummy_threading.py b/cashew/Lib/dummy_threading.py
new file mode 100644
index 0000000..81028a3
--- /dev/null
+++ b/cashew/Lib/dummy_threading.py
@@ -0,0 +1,78 @@
+"""Faux ``threading`` version using ``dummy_thread`` instead of ``thread``.
+
+The module ``_dummy_threading`` is added to ``sys.modules`` in order
+to not have ``threading`` considered imported. Had ``threading`` been
+directly imported it would have made all subsequent imports succeed
+regardless of whether ``thread`` was available which is not desired.
+
+"""
+from sys import modules as sys_modules
+
+import dummy_thread
+
+# Declaring now so as to not have to nest ``try``s to get proper clean-up.
+holding_thread = False
+holding_threading = False
+holding__threading_local = False
+
+try:
+ # Could have checked if ``thread`` was not in sys.modules and gone
+ # a different route, but decided to mirror technique used with
+ # ``threading`` below.
+ if 'thread' in sys_modules:
+ held_thread = sys_modules['thread']
+ holding_thread = True
+ # Must have some module named ``thread`` that implements its API
+ # in order to initially import ``threading``.
+ sys_modules['thread'] = sys_modules['dummy_thread']
+
+ if 'threading' in sys_modules:
+ # If ``threading`` is already imported, might as well prevent
+ # trying to import it more than needed by saving it if it is
+ # already imported before deleting it.
+ held_threading = sys_modules['threading']
+ holding_threading = True
+ del sys_modules['threading']
+
+ if '_threading_local' in sys_modules:
+ # If ``_threading_local`` is already imported, might as well prevent
+ # trying to import it more than needed by saving it if it is
+ # already imported before deleting it.
+ held__threading_local = sys_modules['_threading_local']
+ holding__threading_local = True
+ del sys_modules['_threading_local']
+
+ import threading
+ # Need a copy of the code kept somewhere...
+ sys_modules['_dummy_threading'] = sys_modules['threading']
+ del sys_modules['threading']
+ sys_modules['_dummy__threading_local'] = sys_modules['_threading_local']
+ del sys_modules['_threading_local']
+ from _dummy_threading import *
+ from _dummy_threading import __all__
+
+finally:
+ # Put back ``threading`` if we overwrote earlier
+
+ if holding_threading:
+ sys_modules['threading'] = held_threading
+ del held_threading
+ del holding_threading
+
+ # Put back ``_threading_local`` if we overwrote earlier
+
+ if holding__threading_local:
+ sys_modules['_threading_local'] = held__threading_local
+ del held__threading_local
+ del holding__threading_local
+
+ # Put back ``thread`` if we overwrote, else del the entry we made
+ if holding_thread:
+ sys_modules['thread'] = held_thread
+ del held_thread
+ else:
+ del sys_modules['thread']
+ del holding_thread
+
+ del dummy_thread
+ del sys_modules
diff --git a/cashew/Lib/email/__init__.py b/cashew/Lib/email/__init__.py
new file mode 100644
index 0000000..a780ebe
--- /dev/null
+++ b/cashew/Lib/email/__init__.py
@@ -0,0 +1,123 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""A package for parsing, handling, and generating email messages."""
+
+__version__ = '4.0.3'
+
+__all__ = [
+ # Old names
+ 'base64MIME',
+ 'Charset',
+ 'Encoders',
+ 'Errors',
+ 'Generator',
+ 'Header',
+ 'Iterators',
+ 'Message',
+ 'MIMEAudio',
+ 'MIMEBase',
+ 'MIMEImage',
+ 'MIMEMessage',
+ 'MIMEMultipart',
+ 'MIMENonMultipart',
+ 'MIMEText',
+ 'Parser',
+ 'quopriMIME',
+ 'Utils',
+ 'message_from_string',
+ 'message_from_file',
+ # new names
+ 'base64mime',
+ 'charset',
+ 'encoders',
+ 'errors',
+ 'generator',
+ 'header',
+ 'iterators',
+ 'message',
+ 'mime',
+ 'parser',
+ 'quoprimime',
+ 'utils',
+ ]
+
+
+
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, *args, **kws):
+ """Parse a string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parsestr(s)
+
+
+def message_from_file(fp, *args, **kws):
+ """Read a file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parse(fp)
+
+
+
+# Lazy loading to provide name mapping from new-style names (PEP 8 compatible
+# email 4.0 module names), to old-style names (email 3.0 module names).
+import sys
+
+class LazyImporter(object):
+ def __init__(self, module_name):
+ self.__name__ = 'email.' + module_name
+
+ def __getattr__(self, name):
+ __import__(self.__name__)
+ mod = sys.modules[self.__name__]
+ self.__dict__.update(mod.__dict__)
+ return getattr(mod, name)
+
+
+_LOWERNAMES = [
+ # email. -> email.
+ 'Charset',
+ 'Encoders',
+ 'Errors',
+ 'FeedParser',
+ 'Generator',
+ 'Header',
+ 'Iterators',
+ 'Message',
+ 'Parser',
+ 'Utils',
+ 'base64MIME',
+ 'quopriMIME',
+ ]
+
+_MIMENAMES = [
+ # email.MIME -> email.mime.
+ 'Audio',
+ 'Base',
+ 'Image',
+ 'Message',
+ 'Multipart',
+ 'NonMultipart',
+ 'Text',
+ ]
+
+for _name in _LOWERNAMES:
+ importer = LazyImporter(_name.lower())
+ sys.modules['email.' + _name] = importer
+ setattr(sys.modules['email'], _name, importer)
+
+
+import email.mime
+for _name in _MIMENAMES:
+ importer = LazyImporter('mime.' + _name.lower())
+ sys.modules['email.MIME' + _name] = importer
+ setattr(sys.modules['email'], 'MIME' + _name, importer)
+ setattr(sys.modules['email.mime'], _name, importer)
diff --git a/cashew/Lib/email/_parseaddr.py b/cashew/Lib/email/_parseaddr.py
new file mode 100644
index 0000000..690db2c
--- /dev/null
+++ b/cashew/Lib/email/_parseaddr.py
@@ -0,0 +1,497 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py. This should eventually be rewritten.
+"""
+
+__all__ = [
+ 'mktime_tz',
+ 'parsedate',
+ 'parsedate_tz',
+ 'quote',
+ ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+
+
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+
+ Accounts for military timezones.
+ """
+ data = data.split()
+ # The FWS after the comma after the day-of-week is optional, so search and
+ # adjust for this.
+ if data[0].endswith(',') or data[0].lower() in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ else:
+ i = data[0].rfind(',')
+ if i >= 0:
+ data[0] = data[0][i+1:]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = data[0].split('-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = s.find('+')
+ if i > 0:
+ data[3:] = [s[:i], s[i+1:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = mm.lower()
+ if mm not in _monthnames:
+ dd, mm = mm, dd.lower()
+ if mm not in _monthnames:
+ return None
+ mm = _monthnames.index(mm) + 1
+ if mm > 12:
+ mm -= 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = yy.find(':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if not yy[0].isdigit():
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = tm.split(':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = int(yy)
+ dd = int(dd)
+ thh = int(thh)
+ tmm = int(tmm)
+ tss = int(tss)
+ except ValueError:
+ return None
+ # Check for a yy specified in two-digit format, then convert it to the
+ # appropriate four-digit format, according to the POSIX standard. RFC 822
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+ # mandates a 4-digit yy. For more information, see the documentation for
+ # the time module.
+ if yy < 100:
+ # The year is between 1969 and 1999 (inclusive).
+ if yy > 68:
+ yy += 1900
+ # The year is between 2000 and 2068 (inclusive).
+ else:
+ yy += 2000
+ tzoffset = None
+ tz = tz.upper()
+ if tz in _timezones:
+ tzoffset = _timezones[tz]
+ else:
+ try:
+ tzoffset = int(tz)
+ except ValueError:
+ pass
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+ # Daylight Saving Time flag is set to -1, since DST is unknown.
+ return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
+
+
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t = parsedate_tz(data)
+ if isinstance(t, tuple):
+ return t[:9]
+ else:
+ return t
+
+
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = calendar.timegm(data)
+ return t - data[9]
+
+
+def quote(str):
+ """Prepare string to be used in a quoted string.
+
+ Turns backslash and double quote characters into quoted pairs. These
+ are the only characters that need to be quoted inside a quoted string.
+ Does not add the surrounding double quotes.
+ """
+ return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass:
+ """Address parser class by Ben Escoto.
+
+ To understand what this class does, it helps to have a copy of RFC 2822 in
+ front of you.
+
+ Note: this class interface is deprecated and may be removed in the future.
+ Use rfc822.AddressList instead.
+ """
+
+ def __init__(self, field):
+ """Initialize a new instance.
+
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.FWS = self.LWS + self.CR
+ self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
+ self.field = field
+ self.commentlist = []
+
+ def gotonext(self):
+ """Parse up to the start of the next address."""
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else:
+ break
+
+ def getaddrlist(self):
+ """Parse all addresses.
+
+ Returns a list containing all of the addresses.
+ """
+ result = []
+ while self.pos < len(self.field):
+ ad = self.getaddress()
+ if ad:
+ result += ad
+ else:
+ result.append(('', ''))
+ return result
+
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+
+ self.gotonext()
+ returnlist = []
+
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+
+ fieldlen = len(self.field)
+ self.pos += 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos += 1
+ break
+ returnlist = returnlist + self.getaddress()
+
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+
+ if self.commentlist:
+ returnlist = [(SPACE.join(plist) + ' (' +
+ ' '.join(self.commentlist) + ')', routeaddr)]
+ else:
+ returnlist = [(SPACE.join(plist), routeaddr)]
+
+ else:
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos += 1
+
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos += 1
+ return returnlist
+
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+
+ expectroute = False
+ self.pos += 1
+ self.gotonext()
+ adlist = ''
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = False
+ elif self.field[self.pos] == '>':
+ self.pos += 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos += 1
+ expectroute = True
+ elif self.field[self.pos] == ':':
+ self.pos += 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos += 1
+ break
+ self.gotonext()
+
+ return adlist
+
+ def getaddrspec(self):
+ """Parse an RFC 2822 addr-spec."""
+ aslist = []
+
+ self.gotonext()
+ while self.pos < len(self.field):
+ if self.field[self.pos] == '.':
+ aslist.append('.')
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % quote(self.getquote()))
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ aslist.append(self.getatom())
+ self.gotonext()
+
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return EMPTYSTRING.join(aslist)
+
+ aslist.append('@')
+ self.pos += 1
+ self.gotonext()
+ return EMPTYSTRING.join(aslist) + self.getdomain()
+
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos += 1
+ sdlist.append('.')
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ sdlist.append(self.getatom())
+ return EMPTYSTRING.join(sdlist)
+
+ def getdelimited(self, beginchar, endchars, allowcomments=True):
+ """Parse a header fragment delimited by special characters.
+
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+
+ If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+ within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+
+ slist = ['']
+ quote = False
+ self.pos += 1
+ while self.pos < len(self.field):
+ if quote:
+ slist.append(self.field[self.pos])
+ quote = False
+ elif self.field[self.pos] in endchars:
+ self.pos += 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ continue # have already advanced pos from getcomment
+ elif self.field[self.pos] == '\\':
+ quote = True
+ else:
+ slist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(slist)
+
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', False)
+
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', True)
+
+ def getdomainliteral(self):
+ """Parse an RFC 2822 domain-literal."""
+ return '[%s]' % self.getdelimited('[', ']\r', False)
+
+ def getatom(self, atomends=None):
+ """Parse an RFC 2822 atom.
+
+ Optional atomends specifies a different set of end token delimiters
+ (the default is to use self.atomends). This is used e.g. in
+ getphraselist() since phrase endings must not include the `.' (which
+ is legal in phrases)."""
+ atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in atomends:
+ break
+ else:
+ atomlist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(atomlist)
+
+ def getphraselist(self):
+ """Parse a sequence of RFC 2822 phrases.
+
+ A phrase is a sequence of words, which are in turn either RFC 2822
+ atoms or quoted-strings. Phrases are canonicalized by squeezing all
+ runs of continuous whitespace into one space.
+ """
+ plist = []
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.FWS:
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.phraseends:
+ break
+ else:
+ plist.append(self.getatom(self.phraseends))
+
+ return plist
+
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+
+ def __len__(self):
+ return len(self.addresslist)
+
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __iadd__(self, other):
+ # Set union, in-place
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ self.addresslist.append(x)
+ return self
+
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __isub__(self, other):
+ # Set difference, in-place
+ for x in other.addresslist:
+ if x in self.addresslist:
+ self.addresslist.remove(x)
+ return self
+
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
diff --git a/cashew/Lib/email/base64mime.py b/cashew/Lib/email/base64mime.py
new file mode 100644
index 0000000..61eba2b
--- /dev/null
+++ b/cashew/Lib/email/base64mime.py
@@ -0,0 +1,183 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding. To deal with the various line wrapping issues, use the email.header
+module.
+"""
+
+__all__ = [
+ 'base64_len',
+ 'body_decode',
+ 'body_encode',
+ 'decode',
+ 'decodestring',
+ 'encode',
+ 'encodestring',
+ 'header_encode',
+ ]
+
+
+from binascii import b2a_base64, a2b_base64
+from email.utils import fix_eols
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+
+# Helpers
+def base64_len(s):
+ """Return the length of s when it is encoded with base64."""
+ groups_of_3, leftover = divmod(len(s), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ # Thanks, Tim!
+ n = groups_of_3 * 4
+ if leftover:
+ n += 4
+ return n
+
+
+
+def header_encode(header, charset='iso-8859-1', keep_eols=False,
+ maxlinelen=76, eol=NL):
+ """Encode a single header line with Base64 encoding in a given charset.
+
+ Defined in RFC 2045, this Base64 encoding is identical to normal Base64
+ encoding, except that each line must be intelligently wrapped (respecting
+ the Base64 encoding), and subsequent lines must start with a space.
+
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1.
+
+ End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
+ to the canonical email line separator \\r\\n unless the keep_eols
+ parameter is True (the default is False).
+
+ Each line of the header will be terminated in the value of eol, which
+ defaults to "\\n". Set this to "\\r\\n" if you are using the result of
+ this function directly in email.
+
+ The resulting string will be in the form:
+
+ "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
+ =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
+
+ with each line wrapped at, at most, maxlinelen characters (defaults to 76
+ characters).
+ """
+ # Return empty headers unchanged
+ if not header:
+ return header
+
+ if not keep_eols:
+ header = fix_eols(header)
+
+ # Base64 encode each line, in encoded chunks no greater than maxlinelen in
+ # length, after the RFC chrome is added in.
+ base64ed = []
+ max_encoded = maxlinelen - len(charset) - MISC_LEN
+ max_unencoded = max_encoded * 3 // 4
+
+ for i in range(0, len(header), max_unencoded):
+ base64ed.append(b2a_base64(header[i:i+max_unencoded]))
+
+ # Now add the RFC chrome to each encoded chunk
+ lines = []
+ for line in base64ed:
+ # Ignore the last character of each line if it is a newline
+ if line.endswith(NL):
+ line = line[:-1]
+ # Add the chrome
+ lines.append('=?%s?b?%s?=' % (charset, line))
+ # Glue the lines together and return it. BAW: should we be able to
+ # specify the leading whitespace in the joiner?
+ joiner = eol + ' '
+ return joiner.join(lines)
+
+
+
+def encode(s, binary=True, maxlinelen=76, eol=NL):
+ """Encode a string with base64.
+
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters).
+
+ If binary is False, end-of-line characters will be converted to the
+ canonical email end-of-line sequence \\r\\n. Otherwise they will be left
+ verbatim (this is the default).
+
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+ """
+ if not s:
+ return s
+
+ if not binary:
+ s = fix_eols(s)
+
+ encvec = []
+ max_unencoded = maxlinelen * 3 // 4
+ for i in range(0, len(s), max_unencoded):
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s[i:i + max_unencoded])
+ if enc.endswith(NL) and eol != NL:
+ enc = enc[:-1] + eol
+ encvec.append(enc)
+ return EMPTYSTRING.join(encvec)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_encode = encode
+encodestring = encode
+
+
+
+def decode(s, convert_eols=None):
+ """Decode a raw base64 string.
+
+ If convert_eols is set to a string value, all canonical email linefeeds,
+ e.g. "\\r\\n", in the decoded text will be converted to the value of
+ convert_eols. os.linesep is a good choice for convert_eols if you are
+ decoding a text attachment.
+
+ This function does not parse a full MIME header value encoded with
+ base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
+ level email.header class for that functionality.
+ """
+ if not s:
+ return s
+
+ dec = a2b_base64(s)
+ if convert_eols:
+ return dec.replace(CRLF, convert_eols)
+ return dec
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
diff --git a/cashew/Lib/email/charset.py b/cashew/Lib/email/charset.py
new file mode 100644
index 0000000..30a13ff
--- /dev/null
+++ b/cashew/Lib/email/charset.py
@@ -0,0 +1,397 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+__all__ = [
+ 'Charset',
+ 'add_alias',
+ 'add_charset',
+ 'add_codec',
+ ]
+
+import codecs
+import email.base64mime
+import email.quoprimime
+
+from email import errors
+from email.encoders import encode_7or8bit
+
+
+
+# Flags for types of header encodings
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
+
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+MISC_LEN = 7
+
+DEFAULT_CHARSET = 'us-ascii'
+
+
+
+# Defaults
+CHARSETS = {
+ # input header enc body enc output conv
+ 'iso-8859-1': (QP, QP, None),
+ 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'iso-8859-16': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
+ 'us-ascii': (None, None, None),
+ 'big5': (BASE64, BASE64, None),
+ 'gb2312': (BASE64, BASE64, None),
+ 'euc-jp': (BASE64, None, 'iso-2022-jp'),
+ 'shift_jis': (BASE64, None, 'iso-2022-jp'),
+ 'iso-2022-jp': (BASE64, None, None),
+ 'koi8-r': (BASE64, BASE64, None),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
+ # We're making this one up to represent raw unencoded 8-bit
+ '8bit': (None, BASE64, 'utf-8'),
+ }
+
+# Aliases for other commonly-used names for character sets. Map
+# them to the real ones used in email.
+ALIASES = {
+ 'latin_1': 'iso-8859-1',
+ 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'latin_10':'iso-8859-16',
+ 'latin-10':'iso-8859-16',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
+ 'ascii': 'us-ascii',
+ }
+
+
+# Map charsets to their Unicode codec strings.
+CODEC_MAP = {
+ 'gb2312': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+ # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+ # Let that stuff pass through without conversion to/from Unicode.
+ 'us-ascii': None,
+ }
+
+
+
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+ """Add character set properties to the global registry.
+
+ charset is the input character set, and must be the canonical name of a
+ character set.
+
+ Optional header_enc and body_enc is either Charset.QP for
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
+
+ Optional output_charset is the character set that the output should be
+ in. Conversions will proceed from input charset, to Unicode, to the
+ output charset when the method Charset.convert() is called. The default
+ is to output in the same character set as the input.
+
+ Both input_charset and output_charset must have Unicode codec entries in
+ the module's charset-to-codec mapping; use add_codec(charset, codecname)
+ to add codecs the module does not know about. See the codecs module's
+ documentation for more information.
+ """
+ if body_enc == SHORTEST:
+ raise ValueError('SHORTEST not allowed for body_enc')
+ CHARSETS[charset] = (header_enc, body_enc, output_charset)
+
+
+def add_alias(alias, canonical):
+ """Add a character set alias.
+
+ alias is the alias name, e.g. latin-1
+ canonical is the character set's canonical name, e.g. iso-8859-1
+ """
+ ALIASES[alias] = canonical
+
+
+def add_codec(charset, codecname):
+ """Add a codec that map characters in the given charset to/from Unicode.
+
+ charset is the canonical name of a character set. codecname is the name
+ of a Python codec, as appropriate for the second argument to the unicode()
+ built-in, or to the encode() method of a Unicode string.
+ """
+ CODEC_MAP[charset] = codecname
+
+
+
+class Charset:
+ """Map character sets to their email properties.
+
+ This class provides information about the requirements imposed on email
+ for a specific character set. It also provides convenience routines for
+ converting between character sets, given the availability of the
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
+
+ Certain character sets must be encoded with quoted-printable or base64
+ when used in email headers or bodies. Certain character sets must be
+ converted outright, and are not allowed in email. Instances of this
+ module expose the following information about a character set:
+
+ input_charset: The initial character set specified. Common aliases
+ are converted to their `official' email names (e.g. latin_1
+ is converted to iso-8859-1). Defaults to 7-bit us-ascii.
+
+ header_encoding: If the character set must be encoded before it can be
+ used in an email header, this attribute will be set to
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
+
+ body_encoding: Same as header_encoding, but describes the encoding for the
+ mail message's body, which indeed may be different than the
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
+
+ output_charset: Some character sets must be converted before they can be
+ used in email headers or bodies. If the input_charset is
+ one of them, this attribute will contain the name of the
+ charset output will be converted to. Otherwise, it will
+ be None.
+
+ input_codec: The name of the Python codec used to convert the
+ input_charset to Unicode. If no conversion codec is
+ necessary, this attribute will be None.
+
+ output_codec: The name of the Python codec used to convert Unicode
+ to the output_charset. If no conversion codec is necessary,
+ this attribute will have the same value as the input_codec.
+ """
+ def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
+ # unicode because its .lower() is locale insensitive. If the argument
+ # is already a unicode, we leave it at that, but ensure that the
+ # charset is ASCII, as the standard (RFC XXX) requires.
+ try:
+ if isinstance(input_charset, unicode):
+ input_charset.encode('ascii')
+ else:
+ input_charset = unicode(input_charset, 'ascii')
+ except UnicodeError:
+ raise errors.CharsetError(input_charset)
+ input_charset = input_charset.lower().encode('ascii')
+ # Set the input charset after filtering through the aliases and/or codecs
+ if not (input_charset in ALIASES or input_charset in CHARSETS):
+ try:
+ input_charset = codecs.lookup(input_charset).name
+ except LookupError:
+ pass
+ self.input_charset = ALIASES.get(input_charset, input_charset)
+ # We can try to guess which encoding and conversion to use by the
+ # charset_map dictionary. Try that first, but let the user override
+ # it.
+ henc, benc, conv = CHARSETS.get(self.input_charset,
+ (SHORTEST, BASE64, None))
+ if not conv:
+ conv = self.input_charset
+ # Set the attributes, allowing the arguments to override the default.
+ self.header_encoding = henc
+ self.body_encoding = benc
+ self.output_charset = ALIASES.get(conv, conv)
+ # Now set the codecs. If one isn't defined for input_charset,
+ # guess and try a Unicode codec with the same name as input_codec.
+ self.input_codec = CODEC_MAP.get(self.input_charset,
+ self.input_charset)
+ self.output_codec = CODEC_MAP.get(self.output_charset,
+ self.output_charset)
+
+ def __str__(self):
+ return self.input_charset.lower()
+
+ __repr__ = __str__
+
+ def __eq__(self, other):
+ return str(self) == str(other).lower()
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def get_body_encoding(self):
+ """Return the content-transfer-encoding used for body encoding.
+
+ This is either the string `quoted-printable' or `base64' depending on
+ the encoding used, or it is a function in which case you should call
+ the function with a single argument, the Message object being
+ encoded. The function should then set the Content-Transfer-Encoding
+ header itself to whatever is appropriate.
+
+ Returns "quoted-printable" if self.body_encoding is QP.
+ Returns "base64" if self.body_encoding is BASE64.
+ Returns "7bit" otherwise.
+ """
+ assert self.body_encoding != SHORTEST
+ if self.body_encoding == QP:
+ return 'quoted-printable'
+ elif self.body_encoding == BASE64:
+ return 'base64'
+ else:
+ return encode_7or8bit
+
+ def convert(self, s):
+ """Convert a string from the input_codec to the output_codec."""
+ if self.input_codec != self.output_codec:
+ return unicode(s, self.input_codec).encode(self.output_codec)
+ else:
+ return s
+
+ def to_splittable(self, s):
+ """Convert a possibly multibyte string to a safely splittable format.
+
+ Uses the input_codec to try and convert the string to Unicode, so it
+ can be safely split on character boundaries (even for multibyte
+ characters).
+
+ Returns the string as-is if it isn't known how to convert it to
+ Unicode with the input_charset.
+
+ Characters that could not be converted to Unicode will be replaced
+ with the Unicode replacement character U+FFFD.
+ """
+ if isinstance(s, unicode) or self.input_codec is None:
+ return s
+ try:
+ return unicode(s, self.input_codec, 'replace')
+ except LookupError:
+ # Input codec not installed on system, so return the original
+ # string unchanged.
+ return s
+
+ def from_splittable(self, ustr, to_output=True):
+ """Convert a splittable string back into an encoded string.
+
+ Uses the proper codec to try and convert the string from Unicode back
+ into an encoded format. Return the string as-is if it is not Unicode,
+ or if it could not be converted from Unicode.
+
+ Characters that could not be converted from Unicode will be replaced
+ with an appropriate character (usually '?').
+
+ If to_output is True (the default), uses output_codec to convert to an
+ encoded format. If to_output is False, uses input_codec.
+ """
+ if to_output:
+ codec = self.output_codec
+ else:
+ codec = self.input_codec
+ if not isinstance(ustr, unicode) or codec is None:
+ return ustr
+ try:
+ return ustr.encode(codec, 'replace')
+ except LookupError:
+ # Output codec not installed
+ return ustr
+
+ def get_output_charset(self):
+ """Return the output character set.
+
+ This is self.output_charset if that is not None, otherwise it is
+ self.input_charset.
+ """
+ return self.output_charset or self.input_charset
+
+ def encoded_header_len(self, s):
+ """Return the length of the encoded header string."""
+ cset = self.get_output_charset()
+ # The len(s) of a 7bit encoding is len(s)
+ if self.header_encoding == BASE64:
+ return email.base64mime.base64_len(s) + len(cset) + MISC_LEN
+ elif self.header_encoding == QP:
+ return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN
+ elif self.header_encoding == SHORTEST:
+ lenb64 = email.base64mime.base64_len(s)
+ lenqp = email.quoprimime.header_quopri_len(s)
+ return min(lenb64, lenqp) + len(cset) + MISC_LEN
+ else:
+ return len(s)
+
+ def header_encode(self, s, convert=False):
+ """Header-encode a string, optionally converting it to output_charset.
+
+ If convert is True, the string will be converted from the input
+ charset to the output charset automatically. This is not useful for
+ multibyte character sets, which have line length issues (multibyte
+ characters must be split on a character, not a byte boundary); use the
+ high-level Header class to deal with these issues. convert defaults
+ to False.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.header_encoding.
+ """
+ cset = self.get_output_charset()
+ if convert:
+ s = self.convert(s)
+ # 7bit/8bit encodings return the string unchanged (modulo conversions)
+ if self.header_encoding == BASE64:
+ return email.base64mime.header_encode(s, cset)
+ elif self.header_encoding == QP:
+ return email.quoprimime.header_encode(s, cset, maxlinelen=None)
+ elif self.header_encoding == SHORTEST:
+ lenb64 = email.base64mime.base64_len(s)
+ lenqp = email.quoprimime.header_quopri_len(s)
+ if lenb64 < lenqp:
+ return email.base64mime.header_encode(s, cset)
+ else:
+ return email.quoprimime.header_encode(s, cset, maxlinelen=None)
+ else:
+ return s
+
+ def body_encode(self, s, convert=True):
+ """Body-encode a string and convert it to output_charset.
+
+ If convert is True (the default), the string will be converted from
+ the input charset to output charset automatically. Unlike
+ header_encode(), there are no issues with byte boundaries and
+ multibyte charsets in email bodies, so this is usually pretty safe.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.body_encoding.
+ """
+ if convert:
+ s = self.convert(s)
+ # 7bit/8bit encodings return the string unchanged (module conversions)
+ if self.body_encoding is BASE64:
+ return email.base64mime.body_encode(s)
+ elif self.body_encoding is QP:
+ return email.quoprimime.body_encode(s)
+ else:
+ return s
diff --git a/cashew/Lib/email/encoders.py b/cashew/Lib/email/encoders.py
new file mode 100644
index 0000000..af45e62
--- /dev/null
+++ b/cashew/Lib/email/encoders.py
@@ -0,0 +1,82 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Encodings and related functions."""
+
+__all__ = [
+ 'encode_7or8bit',
+ 'encode_base64',
+ 'encode_noop',
+ 'encode_quopri',
+ ]
+
+import base64
+
+from quopri import encodestring as _encodestring
+
+
+
+def _qencode(s):
+ enc = _encodestring(s, quotetabs=True)
+ # Must encode spaces, which quopri.encodestring() doesn't do
+ return enc.replace(' ', '=20')
+
+
+def _bencode(s):
+ # We can't quite use base64.encodestring() since it tacks on a "courtesy
+ # newline". Blech!
+ if not s:
+ return s
+ hasnewline = (s[-1] == '\n')
+ value = base64.encodestring(s)
+ if not hasnewline and value[-1] == '\n':
+ return value[:-1]
+ return value
+
+
+
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = _bencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+
+
+
+def encode_quopri(msg):
+ """Encode the message's payload in quoted-printable.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+
+
+
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
+ orig = msg.get_payload()
+ if orig is None:
+ # There's no payload. For backwards compatibility we use 7bit
+ msg['Content-Transfer-Encoding'] = '7bit'
+ return
+ # We play a trick to make this go fast. If encoding to ASCII succeeds, we
+ # know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ orig.encode('ascii')
+ except UnicodeError:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+
+
+
+def encode_noop(msg):
+ """Do nothing."""
diff --git a/cashew/Lib/email/errors.py b/cashew/Lib/email/errors.py
new file mode 100644
index 0000000..d52a624
--- /dev/null
+++ b/cashew/Lib/email/errors.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""email package exception classes."""
+
+
+
+class MessageError(Exception):
+ """Base class for errors in the email package."""
+
+
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+
+
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+
+
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+
+
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
+
+
+class CharsetError(MessageError):
+ """An illegal charset was given."""
+
+
+
+# These are parsing defects which the parser was able to work around.
+class MessageDefect:
+ """Base class for a message defect."""
+
+ def __init__(self, line=None):
+ self.line = line
+
+class NoBoundaryInMultipartDefect(MessageDefect):
+ """A message claimed to be a multipart but had no boundary parameter."""
+
+class StartBoundaryNotFoundDefect(MessageDefect):
+ """The claimed start boundary was never found."""
+
+class FirstHeaderLineIsContinuationDefect(MessageDefect):
+ """A message had a continuation line as its first header line."""
+
+class MisplacedEnvelopeHeaderDefect(MessageDefect):
+ """A 'Unix-from' header was found in the middle of a header block."""
+
+class MalformedHeaderDefect(MessageDefect):
+ """Found a header that was missing a colon, or was otherwise malformed."""
+
+class MultipartInvariantViolationDefect(MessageDefect):
+ """A message claimed to be a multipart but no subparts were found."""
diff --git a/cashew/Lib/email/feedparser.py b/cashew/Lib/email/feedparser.py
new file mode 100644
index 0000000..8031ca6
--- /dev/null
+++ b/cashew/Lib/email/feedparser.py
@@ -0,0 +1,505 @@
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+
+"""FeedParser - An email feed parser.
+
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+
+The other advantage of this parser is that it will never raise a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+"""
+
+__all__ = ['FeedParser']
+
+import re
+
+from email import errors
+from email import message
+
+NLCRE = re.compile('\r\n|\r|\n')
+NLCRE_bol = re.compile('(\r\n|\r|\n)')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile('(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+EMPTYSTRING = ''
+NL = '\n'
+
+NeedMoreData = object()
+
+
+
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # Chunks of the last partial line pushed into this object.
+ self._partial = []
+ # The list of full, pushed lines, in reverse order
+ self._lines = []
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+
+ def close(self):
+ # Don't forget any trailing partial line.
+ self.pushlines(''.join(self._partial).splitlines(True))
+ self._partial = []
+ self._closed = True
+
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.pop()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in self._eofstack[::-1]:
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.append(line)
+ return ''
+ return line
+
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.append(line)
+
+ def push(self, data):
+ """Push some new data into this object."""
+ # Crack into lines, but preserve the linesep characters on the end of each
+ parts = data.splitlines(True)
+
+ if not parts or not parts[0].endswith(('\n', '\r')):
+ # No new complete lines, so just accumulate partials
+ self._partial += parts
+ return
+
+ if self._partial:
+ # If there are previous leftovers, complete them now
+ self._partial.append(parts[0])
+ parts[0:1] = ''.join(self._partial).splitlines(True)
+ del self._partial[:]
+
+ # If the last element of the list does not end in a newline, then treat
+ # it as a partial line. We only check for '\n' here because a line
+ # ending with '\r' might be a line that was split in the middle of a
+ # '\r\n' sequence (see bugs 1555570 and 1721862).
+ if not parts[-1].endswith('\n'):
+ self._partial = [parts.pop()]
+ self.pushlines(parts)
+
+ def pushlines(self, lines):
+ # Crack into lines, but preserve the newlines on the end of each
+ parts = NLCRE_crack.split(data)
+ # The *ahem* interesting behaviour of re.split when supplied grouping
+ # parentheses is that the last element of the resulting list is the
+ # data after the final RE. In the case of a NL/CR terminated string,
+ # this is the empty string.
+ self._partial = parts.pop()
+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
+ # is there a \n to follow later?
+ if not self._partial and parts and parts[-1].endswith('\r'):
+ self._partial = parts.pop(-2)+parts.pop()
+ # parts is a list of strings, alternating between the line contents
+ # and the eol character(s). Gather up a list of lines after
+ # re-attaching the newlines.
+ lines = []
+ for i in range(len(parts) // 2):
+ lines.append(parts[i*2] + parts[i*2+1])
+ self.pushlines(lines)
+
+ def pushlines(self, lines):
+ # Reverse and insert at the front of the lines.
+ self._lines[:0] = lines[::-1]
+
+ def is_closed(self):
+ return self._closed
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+
+
+
+class FeedParser:
+ """A feed-style parser of email."""
+
+ def __init__(self, _factory=message.Message):
+ """_factory is called with no arguments to create a new message obj"""
+ self._factory = _factory
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ self._parse = self._parsegen().next
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ root.defects.append(errors.MultipartInvariantViolationDefect())
+ return root
+
+ def _new_message(self):
+ msg = self._factory()
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could raise errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P' + re.escape(separator) +
+ r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last.get_payload()
+ if isinstance(payload, basestring):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last.set_payload(payload)
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ # Everything from here to the EOF is epilogue.
+ if capturing_preamble:
+ self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If the end boundary ended in a newline, we'll need to make sure
+ # the epilogue isn't None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ # XXX reconsider the joining of folded lines
+ lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
+ self._cur[lastheader] = lhdr
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ i = line.find(':')
+ if i < 0:
+ defect = errors.MalformedHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ lastheader = line[:i]
+ lastvalue = [line[i+1:].lstrip()]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ # XXX reconsider the joining of folded lines
+ self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
diff --git a/cashew/Lib/email/generator.py b/cashew/Lib/email/generator.py
new file mode 100644
index 0000000..e50f912
--- /dev/null
+++ b/cashew/Lib/email/generator.py
@@ -0,0 +1,371 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Classes to generate plain text from a message object tree."""
+
+__all__ = ['Generator', 'DecodedGenerator']
+
+import re
+import sys
+import time
+import random
+import warnings
+
+from cStringIO import StringIO
+from email.header import Header
+
+UNDERSCORE = '_'
+NL = '\n'
+
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+def _is8bitstring(s):
+ if isinstance(s, str):
+ try:
+ unicode(s, 'us-ascii')
+ except UnicodeError:
+ return True
+ return False
+
+
+
+class Generator:
+ """Generates output from a Message object tree.
+
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+ """Create the generator for message flattening.
+
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+
+ Optional mangle_from_ is a flag that, when True (the default), escapes
+ From_ lines in the body of the message by putting a `>' in front of
+ them.
+
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces) than maxheaderlen, the header will split as
+ defined in the Header class. Set maxheaderlen to zero to disable
+ header wrapping. The default is 78, as recommended (but not required)
+ by RFC 2822.
+ """
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self._maxheaderlen = maxheaderlen
+
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+
+ def flatten(self, msg, unixfrom=False):
+ """Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is False to inhibit the printing of any From_ delimiter.
+
+ Note that for subobjects, no From_ line is printed.
+ """
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ print >> self._fp, ufrom
+ self._write(msg)
+
+ def clone(self, fp):
+ """Clone this generator with the exact same options."""
+ return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
+
+ #
+ # Protected interface - undocumented ;/
+ #
+
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a StringIO. The we write the
+ # headers and the StringIO contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._fp = sfp = StringIO()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle__(). If there's no handler for the
+ # full MIME type, then dispatch to self._handle_(). If
+ # that's missing too, then dispatch to self._writeBody().
+ main = msg.get_content_maintype()
+ sub = msg.get_content_subtype()
+ specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = main.replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+
+ #
+ # Default handlers
+ #
+
+ def _write_headers(self, msg):
+ for h, v in msg.items():
+ print >> self._fp, '%s:' % h,
+ if self._maxheaderlen == 0:
+ # Explicit no-wrapping
+ print >> self._fp, v
+ elif isinstance(v, Header):
+ # Header instances know what to do
+ print >> self._fp, v.encode()
+ elif _is8bitstring(v):
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ print >> self._fp, v
+ else:
+ # Header's got lots of smarts, so use it. Note that this is
+ # fundamentally broken though because we lose idempotency when
+ # the header string is continued with tabs. It will now be
+ # continued with spaces. This was reversedly broken before we
+ # fixed bug 1974. Either way, we lose.
+ print >> self._fp, Header(
+ v, maxlinelen=self._maxheaderlen, header_name=h).encode()
+ # A blank line always separates headers from body
+ print >> self._fp
+
+ #
+ # Handlers for writing types and subtypes
+ #
+
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if payload is None:
+ return
+ if not isinstance(payload, basestring):
+ raise TypeError('string payload expected: %s' % type(payload))
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self._fp.write(payload)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ def _handle_multipart(self, msg):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ subparts = msg.get_payload()
+ if subparts is None:
+ subparts = []
+ elif isinstance(subparts, basestring):
+ # e.g. a non-strict parse of a message with no starting boundary.
+ self._fp.write(subparts)
+ return
+ elif not isinstance(subparts, list):
+ # Scalar payload
+ subparts = [subparts]
+ for part in subparts:
+ s = StringIO()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False)
+ msgtexts.append(s.getvalue())
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary()
+ if not boundary:
+ # Create a boundary that doesn't appear in any of the
+ # message texts.
+ alltext = NL.join(msgtexts)
+ boundary = _make_boundary(alltext)
+ msg.set_boundary(boundary)
+ # If there's a preamble, write it out, with a trailing CRLF
+ if msg.preamble is not None:
+ if self._mangle_from_:
+ preamble = fcre.sub('>From ', msg.preamble)
+ else:
+ preamble = msg.preamble
+ print >> self._fp, preamble
+ # dash-boundary transport-padding CRLF
+ print >> self._fp, '--' + boundary
+ # body-part
+ if msgtexts:
+ self._fp.write(msgtexts.pop(0))
+ # *encapsulation
+ # --> delimiter transport-padding
+ # --> CRLF body-part
+ for body_part in msgtexts:
+ # delimiter transport-padding CRLF
+ print >> self._fp, '\n--' + boundary
+ # body-part
+ self._fp.write(body_part)
+ # close-delimiter transport-padding
+ self._fp.write('\n--' + boundary + '--' + NL)
+ if msg.epilogue is not None:
+ if self._mangle_from_:
+ epilogue = fcre.sub('>From ', msg.epilogue)
+ else:
+ epilogue = msg.epilogue
+ self._fp.write(epilogue)
+
+ def _handle_multipart_signed(self, msg):
+ # The contents of signed parts has to stay unmodified in order to keep
+ # the signature intact per RFC1847 2.1, so we disable header wrapping.
+ # RDM: This isn't enough to completely preserve the part, but it helps.
+ old_maxheaderlen = self._maxheaderlen
+ try:
+ self._maxheaderlen = 0
+ self._handle_multipart(msg)
+ finally:
+ self._maxheaderlen = old_maxheaderlen
+
+ def _handle_message_delivery_status(self, msg):
+ # We can't just write the headers directly to self's file object
+ # because this will leave an extra newline between the last header
+ # block and the boundary. Sigh.
+ blocks = []
+ for part in msg.get_payload():
+ s = StringIO()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False)
+ text = s.getvalue()
+ lines = text.split('\n')
+ # Strip off the unnecessary trailing empty line
+ if lines and lines[-1] == '':
+ blocks.append(NL.join(lines[:-1]))
+ else:
+ blocks.append(text)
+ # Now join all the blocks with an empty line. This has the lovely
+ # effect of separating each block with an empty line, but not adding
+ # an extra one after the last one.
+ self._fp.write(NL.join(blocks))
+
+ def _handle_message(self, msg):
+ s = StringIO()
+ g = self.clone(s)
+ # The payload of a message/rfc822 part should be a multipart sequence
+ # of length 1. The zeroth element of the list should be the Message
+ # object for the subpart. Extract that object, stringify it, and
+ # write it out.
+ # Except, it turns out, when it's a string instead, which happens when
+ # and only when HeaderParser is used on a message of mime type
+ # message/rfc822. Such messages are generated by, for example,
+ # Groupwise when forwarding unadorned messages. (Issue 7970.) So
+ # in that case we just emit the string body.
+ payload = msg.get_payload()
+ if isinstance(payload, list):
+ g.flatten(msg.get_payload(0), unixfrom=False)
+ payload = s.getvalue()
+ self._fp.write(payload)
+
+
+
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+
+class DecodedGenerator(Generator):
+ """Generates a text representation of a message.
+
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+
+ The default value for fmt is None, meaning
+
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
+ if fmt is None:
+ self._fmt = _FMT
+ else:
+ self._fmt = fmt
+
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ maintype = part.get_content_maintype()
+ if maintype == 'text':
+ print >> self, part.get_payload(decode=True)
+ elif maintype == 'multipart':
+ # Just skip this
+ pass
+ else:
+ print >> self, self._fmt % {
+ 'type' : part.get_content_type(),
+ 'maintype' : part.get_content_maintype(),
+ 'subtype' : part.get_content_subtype(),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }
+
+
+
+# Helper
+_width = len(repr(sys.maxint-1))
+_fmt = '%%0%dd' % _width
+
+def _make_boundary(text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxint)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
diff --git a/cashew/Lib/email/header.py b/cashew/Lib/email/header.py
new file mode 100644
index 0000000..2cf870f
--- /dev/null
+++ b/cashew/Lib/email/header.py
@@ -0,0 +1,514 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Header encoding and decoding functionality."""
+
+__all__ = [
+ 'Header',
+ 'decode_header',
+ 'make_header',
+ ]
+
+import re
+import binascii
+
+import email.quoprimime
+import email.base64mime
+
+from email.errors import HeaderParseError
+from email.charset import Charset
+
+NL = '\n'
+SPACE = ' '
+USPACE = u' '
+SPACE8 = ' ' * 8
+UEMPTYSTRING = u''
+
+MAXLINELEN = 76
+
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P.*?) # non-greedy up to the next ?= is the encoded string
+ \?= # literal ?=
+ (?=[ \t]|$) # whitespace or the end of the string
+ ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
+# Find a header embedded in a putative header value. Used to check for
+# header injection attack.
+_embeded_header = re.compile(r'\n[^ \t]+:')
+
+
+
+# Helpers
+_max_append = email.quoprimime._max_append
+
+
+
+def decode_header(header):
+ """Decode a message header value without converting charset.
+
+ Returns a list of (decoded_string, charset) pairs containing each of the
+ decoded parts of the header. Charset is None for non-encoded parts of the
+ header, otherwise a lower-case string containing the name of the character
+ set specified in the encoded string.
+
+ An email.errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
+ """
+ # If no encoding, just return the header
+ header = str(header)
+ if not ecre.search(header):
+ return [(header, None)]
+ decoded = []
+ dec = ''
+ for line in header.splitlines():
+ # This line might not have an encoding in it
+ if not ecre.search(line):
+ decoded.append((line, None))
+ continue
+ parts = ecre.split(line)
+ while parts:
+ unenc = parts.pop(0).strip()
+ if unenc:
+ # Should we continue a long line?
+ if decoded and decoded[-1][1] is None:
+ decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
+ else:
+ decoded.append((unenc, None))
+ if parts:
+ charset, encoding = [s.lower() for s in parts[0:2]]
+ encoded = parts[2]
+ dec = None
+ if encoding == 'q':
+ dec = email.quoprimime.header_decode(encoded)
+ elif encoding == 'b':
+ paderr = len(encoded) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded += '==='[:4 - paderr]
+ try:
+ dec = email.base64mime.decode(encoded)
+ except binascii.Error:
+ # Turn this into a higher level exception. BAW: Right
+ # now we throw the lower level exception away but
+ # when/if we get exception chaining, we'll preserve it.
+ raise HeaderParseError
+ if dec is None:
+ dec = encoded
+
+ if decoded and decoded[-1][1] == charset:
+ decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
+ else:
+ decoded.append((dec, charset))
+ del parts[0:3]
+ return decoded
+
+
+
+def make_header(decoded_seq, maxlinelen=None, header_name=None,
+ continuation_ws=' '):
+ """Create a Header from a sequence of pairs as returned by decode_header()
+
+ decode_header() takes a header value string and returns a sequence of
+ pairs of the format (decoded_string, charset) where charset is the string
+ name of the character set.
+
+ This function takes one of those sequence of pairs and returns a Header
+ instance. Optional maxlinelen, header_name, and continuation_ws are as in
+ the Header constructor.
+ """
+ h = Header(maxlinelen=maxlinelen, header_name=header_name,
+ continuation_ws=continuation_ws)
+ for s, charset in decoded_seq:
+ # None means us-ascii but we can simply pass it on to h.append()
+ if charset is not None and not isinstance(charset, Charset):
+ charset = Charset(charset)
+ h.append(s, charset)
+ return h
+
+
+
+class Header:
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
+ """Create a MIME-compliant header that can contain many character sets.
+
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
+
+ The maximum line length can be specified explicit via maxlinelen. For
+ splitting the first line to a shorter value (to account for the field
+ header which isn't included in s, e.g. `Subject') pass in the name of
+ the field in header_name. The default maxlinelen is 76.
+
+ continuation_ws must be RFC 2822 compliant folding whitespace (usually
+ either a space or a hard tab) which will be prepended to continuation
+ lines.
+
+ errors is passed through to the .append() call.
+ """
+ if charset is None:
+ charset = USASCII
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ self._continuation_ws = continuation_ws
+ cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
+ # BAW: I believe `chunks' and `maxlinelen' should be non-public.
+ self._chunks = []
+ if s is not None:
+ self.append(s, charset, errors)
+ if maxlinelen is None:
+ maxlinelen = MAXLINELEN
+ if header_name is None:
+ # We don't know anything about the field header so the first line
+ # is the same length as subsequent lines.
+ self._firstlinelen = maxlinelen
+ else:
+ # The first line should be shorter to take into account the field
+ # header. Also subtract off 2 extra for the colon and space.
+ self._firstlinelen = maxlinelen - len(header_name) - 2
+ # Second and subsequent lines should subtract off the length in
+ # columns of the continuation whitespace prefix.
+ self._maxlinelen = maxlinelen - cws_expanded_len
+
+ def __str__(self):
+ """A synonym for self.encode()."""
+ return self.encode()
+
+ def __unicode__(self):
+ """Helper for the built-in unicode function."""
+ uchunks = []
+ lastcs = None
+ for s, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ nextcs = charset
+ if uchunks:
+ if lastcs not in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii'):
+ uchunks.append(USPACE)
+ nextcs = None
+ elif nextcs not in (None, 'us-ascii'):
+ uchunks.append(USPACE)
+ lastcs = nextcs
+ uchunks.append(unicode(s, str(charset)))
+ return UEMPTYSTRING.join(uchunks)
+
+ # Rich comparison operators for equality only. BAW: does it make sense to
+ # have or explicitly disable <, <=, >, >= operators?
+ def __eq__(self, other):
+ # other may be a Header or a string. Both are fine so coerce
+ # ourselves to a string, swap the args and do another comparison.
+ return other == self.encode()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def append(self, s, charset=None, errors='strict'):
+ """Append a string to the MIME header.
+
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, str) is true), then charset is the encoding of
+ that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In this case, when producing an RFC 2822 compliant header
+ using RFC 2047 rules, the Unicode string will be encoded using the
+ following charsets in order: us-ascii, the charset hint, utf-8. The
+ first character set not to provoke a UnicodeError is used.
+
+ Optional `errors' is passed as the third argument to any unicode() or
+ ustr.encode() call.
+ """
+ if charset is None:
+ charset = self._charset
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ # If the charset is our faux 8bit charset, leave the string unchanged
+ if charset != '8bit':
+ # We need to test that the string can be converted to unicode and
+ # back to a byte string, given the input and output codecs of the
+ # charset.
+ if isinstance(s, str):
+ # Possibly raise UnicodeError if the byte string can't be
+ # converted to a unicode with the input codec of the charset.
+ incodec = charset.input_codec or 'us-ascii'
+ ustr = unicode(s, incodec, errors)
+ # Now make sure that the unicode could be converted back to a
+ # byte string with the output codec, which may be different
+ # than the iput coded. Still, use the original byte string.
+ outcodec = charset.output_codec or 'us-ascii'
+ ustr.encode(outcodec, errors)
+ elif isinstance(s, unicode):
+ # Now we have to be sure the unicode string can be converted
+ # to a byte string with a reasonable output codec. We want to
+ # use the byte string in the chunk.
+ for charset in USASCII, charset, UTF8:
+ try:
+ outcodec = charset.output_codec or 'us-ascii'
+ s = s.encode(outcodec, errors)
+ break
+ except UnicodeError:
+ pass
+ else:
+ assert False, 'utf-8 conversion failed'
+ self._chunks.append((s, charset))
+
+ def _split(self, s, charset, maxlinelen, splitchars):
+ # Split up a header safely for use with encode_chunks.
+ splittable = charset.to_splittable(s)
+ encoded = charset.from_splittable(splittable, True)
+ elen = charset.encoded_header_len(encoded)
+ # If the line's encoded length first, just return it
+ if elen <= maxlinelen:
+ return [(encoded, charset)]
+ # If we have undetermined raw 8bit characters sitting in a byte
+ # string, we really don't know what the right thing to do is. We
+ # can't really split it because it might be multibyte data which we
+ # could break if we split it between pairs. The least harm seems to
+ # be to not split the header at all, but that means they could go out
+ # longer than maxlinelen.
+ if charset == '8bit':
+ return [(s, charset)]
+ # BAW: I'm not sure what the right test here is. What we're trying to
+ # do is be faithful to RFC 2822's recommendation that ($2.2.3):
+ #
+ # "Note: Though structured field bodies are defined in such a way that
+ # folding can take place between many of the lexical tokens (and even
+ # within some of the lexical tokens), folding SHOULD be limited to
+ # placing the CRLF at higher-level syntactic breaks."
+ #
+ # For now, I can only imagine doing this when the charset is us-ascii,
+ # although it's possible that other charsets may also benefit from the
+ # higher-level syntactic breaks.
+ elif charset == 'us-ascii':
+ return self._split_ascii(s, charset, maxlinelen, splitchars)
+ # BAW: should we use encoded?
+ elif elen == len(s):
+ # We can split on _maxlinelen boundaries because we know that the
+ # encoding won't change the size of the string
+ splitpnt = maxlinelen
+ first = charset.from_splittable(splittable[:splitpnt], False)
+ last = charset.from_splittable(splittable[splitpnt:], False)
+ else:
+ # Binary search for split point
+ first, last = _binsplit(splittable, charset, maxlinelen)
+ # first is of the proper length so just wrap it in the appropriate
+ # chrome. last must be recursively split.
+ fsplittable = charset.to_splittable(first)
+ fencoded = charset.from_splittable(fsplittable, True)
+ chunk = [(fencoded, charset)]
+ return chunk + self._split(last, charset, self._maxlinelen, splitchars)
+
+ def _split_ascii(self, s, charset, firstlen, splitchars):
+ chunks = _split_ascii(s, firstlen, self._maxlinelen,
+ self._continuation_ws, splitchars)
+ return zip(chunks, [charset]*len(chunks))
+
+ def _encode_chunks(self, newchunks, maxlinelen):
+ # MIME-encode a header with many different charsets and/or encodings.
+ #
+ # Given a list of pairs (string, charset), return a MIME-encoded
+ # string suitable for use in a header field. Each pair may have
+ # different charsets and/or encodings, and the resulting header will
+ # accurately reflect each setting.
+ #
+ # Each encoding can be email.utils.QP (quoted-printable, for
+ # ASCII-like character sets like iso-8859-1), email.utils.BASE64
+ # (Base64, for non-ASCII like character sets like KOI8-R and
+ # iso-2022-jp), or None (no encoding).
+ #
+ # Each pair will be represented on a separate line; the resulting
+ # string will be in the format:
+ #
+ # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
+ # =?charset2?b?SvxyZ2VuIEL2aW5n?="
+ chunks = []
+ for header, charset in newchunks:
+ if not header:
+ continue
+ if charset is None or charset.header_encoding is None:
+ s = header
+ else:
+ s = charset.header_encode(header)
+ # Don't add more folding whitespace than necessary
+ if chunks and chunks[-1].endswith(' '):
+ extra = ''
+ else:
+ extra = ' '
+ _max_append(chunks, s, maxlinelen, extra)
+ joiner = NL + self._continuation_ws
+ return joiner.join(chunks)
+
+ def encode(self, splitchars=';, '):
+ """Encode a message header into an RFC-compliant format.
+
+ There are many issues involved in converting a given string for use in
+ an email header. Only certain character sets are readable in most
+ email clients, and as header strings can only contain a subset of
+ 7-bit ASCII, care must be taken to properly convert and encode (with
+ Base64 or quoted-printable) header strings. In addition, there is a
+ 75-character length limit on any given encoded header field, so
+ line-wrapping must be performed, even with double-byte character sets.
+
+ This method will do its best to convert the string to the correct
+ character set used in email, and encode and line wrap it safely with
+ the appropriate scheme for that character set.
+
+ If the given charset is not known or an error occurs during
+ conversion, this function will return the header untouched.
+
+ Optional splitchars is a string containing characters to split long
+ ASCII lines on, in rough support of RFC 2822's `highest level
+ syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
+ """
+ newchunks = []
+ maxlinelen = self._firstlinelen
+ lastlen = 0
+ for s, charset in self._chunks:
+ # The first bit of the next chunk should be just long enough to
+ # fill the next line. Don't forget the space separating the
+ # encoded words.
+ targetlen = maxlinelen - lastlen - 1
+ if targetlen < charset.encoded_header_len(''):
+ # Stick it on the next line
+ targetlen = maxlinelen
+ newchunks += self._split(s, charset, targetlen, splitchars)
+ lastchunk, lastcharset = newchunks[-1]
+ lastlen = lastcharset.encoded_header_len(lastchunk)
+ value = self._encode_chunks(newchunks, maxlinelen)
+ if _embeded_header.search(value):
+ raise HeaderParseError("header value appears to contain "
+ "an embedded header: {!r}".format(value))
+ return value
+
+
+
+def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
+ lines = []
+ maxlen = firstlen
+ for line in s.splitlines():
+ # Ignore any leading whitespace (i.e. continuation whitespace) already
+ # on the line, since we'll be adding our own.
+ line = line.lstrip()
+ if len(line) < maxlen:
+ lines.append(line)
+ maxlen = restlen
+ continue
+ # Attempt to split the line at the highest-level syntactic break
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace.
+ for ch in splitchars:
+ if ch in line:
+ break
+ else:
+ # There's nothing useful to split the line on, not even spaces, so
+ # just append this line unchanged
+ lines.append(line)
+ maxlen = restlen
+ continue
+ # Now split the line on the character plus trailing whitespace
+ cre = re.compile(r'%s\s*' % ch)
+ if ch in ';,':
+ eol = ch
+ else:
+ eol = ''
+ joiner = eol + ' '
+ joinlen = len(joiner)
+ wslen = len(continuation_ws.replace('\t', SPACE8))
+ this = []
+ linelen = 0
+ for part in cre.split(line):
+ curlen = linelen + max(0, len(this)-1) * joinlen
+ partlen = len(part)
+ onfirstline = not lines
+ # We don't want to split after the field name, if we're on the
+ # first line and the field name is present in the header string.
+ if ch == ' ' and onfirstline and \
+ len(this) == 1 and fcre.match(this[0]):
+ this.append(part)
+ linelen += partlen
+ elif curlen + partlen > maxlen:
+ if this:
+ lines.append(joiner.join(this) + eol)
+ # If this part is longer than maxlen and we aren't already
+ # splitting on whitespace, try to recursively split this line
+ # on whitespace.
+ if partlen > maxlen and ch != ' ':
+ subl = _split_ascii(part, maxlen, restlen,
+ continuation_ws, ' ')
+ lines.extend(subl[:-1])
+ this = [subl[-1]]
+ else:
+ this = [part]
+ linelen = wslen + len(this[-1])
+ maxlen = restlen
+ else:
+ this.append(part)
+ linelen += partlen
+ # Put any left over parts on a line by themselves
+ if this:
+ lines.append(joiner.join(this))
+ return lines
+
+
+
+def _binsplit(splittable, charset, maxlinelen):
+ i = 0
+ j = len(splittable)
+ while i < j:
+ # Invariants:
+ # 1. splittable[:k] fits for all k <= i (note that we *assume*,
+ # at the start, that splittable[:0] fits).
+ # 2. splittable[:k] does not fit for any k > j (at the start,
+ # this means we shouldn't look at any k > len(splittable)).
+ # 3. We don't know about splittable[:k] for k in i+1..j.
+ # 4. We want to set i to the largest k that fits, with i <= k <= j.
+ #
+ m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
+ chunk = charset.from_splittable(splittable[:m], True)
+ chunklen = charset.encoded_header_len(chunk)
+ if chunklen <= maxlinelen:
+ # m is acceptable, so is a new lower bound.
+ i = m
+ else:
+ # m is not acceptable, so final i must be < m.
+ j = m - 1
+ # i == j. Invariant #1 implies that splittable[:i] fits, and
+ # invariant #2 implies that splittable[:i+1] does not fit, so i
+ # is what we're looking for.
+ first = charset.from_splittable(splittable[:i], False)
+ last = charset.from_splittable(splittable[i:], False)
+ return first, last
diff --git a/cashew/Lib/email/iterators.py b/cashew/Lib/email/iterators.py
new file mode 100644
index 0000000..e99f228
--- /dev/null
+++ b/cashew/Lib/email/iterators.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Various types of useful iterators and generators."""
+
+__all__ = [
+ 'body_line_iterator',
+ 'typed_subpart_iterator',
+ 'walk',
+ # Do not include _structure() since it's part of the debugging API.
+ ]
+
+import sys
+from cStringIO import StringIO
+
+
+
+# This function will become a method of the Message class
+def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in depth-first order. This method is a
+ generator.
+ """
+ yield self
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+
+
+
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
+ for subpart in msg.walk():
+ payload = subpart.get_payload(decode=decode)
+ if isinstance(payload, basestring):
+ for line in StringIO(payload):
+ yield line
+
+
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+ """Iterate over the subparts with a given MIME type.
+
+ Use `maintype' as the main MIME type to match against; this defaults to
+ "text". Optional `subtype' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_content_maintype() == maintype:
+ if subtype is None or subpart.get_content_subtype() == subtype:
+ yield subpart
+
+
+
+def _structure(msg, fp=None, level=0, include_default=False):
+ """A handy debugging aid"""
+ if fp is None:
+ fp = sys.stdout
+ tab = ' ' * (level * 4)
+ print >> fp, tab + msg.get_content_type(),
+ if include_default:
+ print >> fp, '[%s]' % msg.get_default_type()
+ else:
+ print >> fp
+ if msg.is_multipart():
+ for subpart in msg.get_payload():
+ _structure(subpart, fp, level+1, include_default)
diff --git a/cashew/Lib/email/message.py b/cashew/Lib/email/message.py
new file mode 100644
index 0000000..d7358cd
--- /dev/null
+++ b/cashew/Lib/email/message.py
@@ -0,0 +1,797 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Basic message object for the email package object model."""
+
+__all__ = ['Message']
+
+import re
+import uu
+import binascii
+import warnings
+from cStringIO import StringIO
+
+# Intrapackage imports
+import email.charset
+from email import utils
+from email import errors
+
+SEMISPACE = '; '
+
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+# Helper functions
+def _splitparam(param):
+ # Split header parameters. BAW: this may be too simple. It isn't
+ # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+ # found in the wild. We may eventually need a full fledged parser
+ # eventually.
+ a, sep, b = param.partition(';')
+ if not sep:
+ return a.strip(), None
+ return a.strip(), b.strip()
+
+def _formatparam(param, value=None, quote=True):
+ """Convenience function to format and return a key=value pair.
+
+ This will quote the value if needed or if quote is true. If value is a
+ three tuple (charset, language, value), it will be encoded according
+ to RFC2231 rules.
+ """
+ if value is not None and len(value) > 0:
+ # A tuple is used for RFC 2231 encoded parameter values where items
+ # are (charset, language, value). charset is a string, not a Charset
+ # instance.
+ if isinstance(value, tuple):
+ # Encode as per RFC 2231
+ param += '*'
+ value = utils.encode_rfc2231(value[2], value[0], value[1])
+ # BAW: Please check this. I think that if quote is set it should
+ # force quoting even if not necessary.
+ if quote or tspecials.search(value):
+ return '%s="%s"' % (param, utils.quote(value))
+ else:
+ return '%s=%s' % (param, value)
+ else:
+ return param
+
+def _parseparam(s):
+ plist = []
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ if '=' in f:
+ i = f.index('=')
+ f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ plist.append(f.strip())
+ s = s[end:]
+ return plist
+
+
+def _unquotevalue(value):
+ # This is different than utils.collapse_rfc2231_value() because it doesn't
+ # try to convert the value to a unicode. Message.get_param() and
+ # Message.get_params() are both currently defined to return the tuple in
+ # the face of RFC 2231 parameters.
+ if isinstance(value, tuple):
+ return value[0], value[1], utils.unquote(value[2])
+ else:
+ return utils.unquote(value)
+
+
+
+class Message:
+ """Basic message object.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
+
+ Message objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrence of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+ """
+ def __init__(self):
+ self._headers = []
+ self._unixfrom = None
+ self._payload = None
+ self._charset = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+ self.defects = []
+ # Default content type
+ self._default_type = 'text/plain'
+
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and envelope header.
+ """
+ return self.as_string(unixfrom=True)
+
+ def as_string(self, unixfrom=False):
+ """Return the entire formatted message as a string.
+ Optional `unixfrom' when True, means include the Unix From_ envelope
+ header.
+
+ This is a convenience method and may not generate the message exactly
+ as you intend because by default it mangles lines that begin with
+ "From ". For more flexibility, use the flatten() method of a
+ Generator instance.
+ """
+ from email.generator import Generator
+ fp = StringIO()
+ g = Generator(fp)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return True if the message consists of multiple parts."""
+ return isinstance(self._payload, list)
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def attach(self, payload):
+ """Add the given payload to the current payload.
+
+ The current payload will always be a list of objects after this method
+ is called. If you want to set the payload to a scalar object, use
+ set_payload() instead.
+ """
+ if self._payload is None:
+ self._payload = [payload]
+ else:
+ self._payload.append(payload)
+
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
+
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
+ """
+ if i is None:
+ payload = self._payload
+ elif not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ else:
+ payload = self._payload[i]
+ if decode:
+ if self.is_multipart():
+ return None
+ cte = self.get('content-transfer-encoding', '').lower()
+ if cte == 'quoted-printable':
+ return utils._qdecode(payload)
+ elif cte == 'base64':
+ try:
+ return utils._bdecode(payload)
+ except binascii.Error:
+ # Incorrect padding
+ return payload
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ sfp = StringIO()
+ try:
+ uu.decode(StringIO(payload+'\n'), sfp, quiet=True)
+ payload = sfp.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return payload
+ # Everything else, including encodings with 8bit or 7bit are returned
+ # unchanged.
+ return payload
+
+ def set_payload(self, payload, charset=None):
+ """Set the payload to the given value.
+
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
+ self._payload = payload
+ if charset is not None:
+ self.set_charset(charset)
+
+ def set_charset(self, charset):
+ """Set the charset of the payload to a given character set.
+
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
+
+ The message will be assumed to be of type text/* encoded with
+ charset.input_charset. It will be converted to charset.output_charset
+ and encoded properly, if needed, when generating the plain text
+ representation of the message. MIME headers (MIME-Version,
+ Content-Type, Content-Transfer-Encoding) will be added as needed.
+
+ """
+ if charset is None:
+ self.del_param('charset')
+ self._charset = None
+ return
+ if isinstance(charset, basestring):
+ charset = email.charset.Charset(charset)
+ if not isinstance(charset, email.charset.Charset):
+ raise TypeError(charset)
+ # BAW: should we accept strings that can serve as arguments to the
+ # Charset constructor?
+ self._charset = charset
+ if 'MIME-Version' not in self:
+ self.add_header('MIME-Version', '1.0')
+ if 'Content-Type' not in self:
+ self.add_header('Content-Type', 'text/plain',
+ charset=charset.get_output_charset())
+ else:
+ self.set_param('charset', charset.get_output_charset())
+ if isinstance(self._payload, unicode):
+ self._payload = self._payload.encode(charset.output_charset)
+ if str(charset) != charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
+ if 'Content-Transfer-Encoding' not in self:
+ cte = charset.get_body_encoding()
+ try:
+ cte(self)
+ except TypeError:
+ self._payload = charset.body_encode(self._payload)
+ self.add_header('Content-Transfer-Encoding', cte)
+
+ def get_charset(self):
+ """Return the Charset instance associated with the message's payload.
+ """
+ return self._charset
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Return the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrence gets returned is undefined. Use get_all() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ self._headers.append((name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = []
+ for k, v in self._headers:
+ if k.lower() != name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
+
+ def has_key(self, name):
+ """Return true if the message contains the header."""
+ missing = object()
+ return self.get(name, missing) is not missing
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [v for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return self._headers[:]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return v
+ return failobj
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+
+ If no such fields exist, failobj is returned (defaults to None).
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(v)
+ if not values:
+ return failobj
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added. If a
+ parameter value contains non-ASCII characters it must be specified as a
+ three-tuple of (charset, language, value), in which case it will be
+ encoded according to RFC2231 rules.
+
+ Example:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append(_formatparam(k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self._headers.append((_name, SEMISPACE.join(parts)))
+
+ def replace_header(self, _name, _value):
+ """Replace a header.
+
+ Replace the first matching header found in the message, retaining
+ header order and case. If no matching header was found, a KeyError is
+ raised.
+ """
+ _name = _name.lower()
+ for i, (k, v) in zip(range(len(self._headers)), self._headers):
+ if k.lower() == _name:
+ self._headers[i] = (k, _value)
+ break
+ else:
+ raise KeyError(_name)
+
+ #
+ # Use these three methods instead of the three above.
+ #
+
+ def get_content_type(self):
+ """Return the message's content type.
+
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
+ """
+ missing = object()
+ value = self.get('content-type', missing)
+ if value is missing:
+ # This should have no parameters
+ return self.get_default_type()
+ ctype = _splitparam(value)[0].lower()
+ # RFC 2045, section 5.2 says if its invalid, use text/plain
+ if ctype.count('/') != 1:
+ return 'text/plain'
+ return ctype
+
+ def get_content_maintype(self):
+ """Return the message's main content type.
+
+ This is the `maintype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[0]
+
+ def get_content_subtype(self):
+ """Returns the message's sub-content type.
+
+ This is the `subtype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[1]
+
+ def get_default_type(self):
+ """Return the `default' content type.
+
+ Most messages have a default content type of text/plain, except for
+ messages that are subparts of multipart/digest containers. Such
+ subparts have a default content type of message/rfc822.
+ """
+ return self._default_type
+
+ def set_default_type(self, ctype):
+ """Set the `default' content type.
+
+ ctype should be either "text/plain" or "message/rfc822", although this
+ is not enforced. The default content type is not stored in the
+ Content-Type header.
+ """
+ self._default_type = ctype
+
+ def _get_params_preserve(self, failobj, header):
+ # Like get_params() but preserves the quoting of values. BAW:
+ # should this be part of the public interface?
+ missing = object()
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ params = []
+ for p in _parseparam(';' + value):
+ try:
+ name, val = p.split('=', 1)
+ name = name.strip()
+ val = val.strip()
+ except ValueError:
+ # Must have been a bare attribute
+ name = p.strip()
+ val = ''
+ params.append((name, val))
+ params = utils.decode_params(params)
+ return params
+
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
+
+ The elements of the returned list are 2-tuples of key/value pairs, as
+ split on the `=' sign. The left hand side of the `=' is the key,
+ while the right hand side is the value. If there is no `=' sign in
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header. Optional header is the header to search instead of
+ Content-Type. If unquote is True, the value is unquoted.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, header)
+ if params is missing:
+ return failobj
+ if unquote:
+ return [(k, _unquotevalue(v)) for k, v in params]
+ else:
+ return params
+
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+
+ Your application should be prepared to deal with 3-tuple return
+ values, and can convert the parameter to a Unicode string like so:
+
+ param = msg.get_param('foo')
+ if isinstance(param, tuple):
+ param = unicode(param[2], param[0] or 'us-ascii')
+
+ In any case, the parameter value (either the returned string, or the
+ VALUE item in the 3-tuple) is always unquoted, unless unquote is set
+ to False.
+ """
+ if header not in self:
+ return failobj
+ for k, v in self._get_params_preserve(failobj, header):
+ if k.lower() == param.lower():
+ if unquote:
+ return _unquotevalue(v)
+ else:
+ return v
+ return failobj
+
+ def set_param(self, param, value, header='Content-Type', requote=True,
+ charset=None, language=''):
+ """Set a parameter in the Content-Type header.
+
+ If the parameter already exists in the header, its value will be
+ replaced with the new value.
+
+ If header is Content-Type and has not yet been defined for this
+ message, it will be set to "text/plain" and the new parameter and
+ value will be appended as per RFC 2045.
+
+ An alternate header can be specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
+
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
+ """
+ if not isinstance(value, tuple) and charset:
+ value = (charset, language, value)
+
+ if header not in self and header.lower() == 'content-type':
+ ctype = 'text/plain'
+ else:
+ ctype = self.get(header)
+ if not self.get_param(param, header=header):
+ if not ctype:
+ ctype = _formatparam(param, value, requote)
+ else:
+ ctype = SEMISPACE.join(
+ [ctype, _formatparam(param, value, requote)])
+ else:
+ ctype = ''
+ for old_param, old_value in self.get_params(header=header,
+ unquote=requote):
+ append_param = ''
+ if old_param.lower() == param.lower():
+ append_param = _formatparam(param, value, requote)
+ else:
+ append_param = _formatparam(old_param, old_value, requote)
+ if not ctype:
+ ctype = append_param
+ else:
+ ctype = SEMISPACE.join([ctype, append_param])
+ if ctype != self.get(header):
+ del self[header]
+ self[header] = ctype
+
+ def del_param(self, param, header='content-type', requote=True):
+ """Remove the given parameter completely from the Content-Type header.
+
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
+ """
+ if header not in self:
+ return
+ new_ctype = ''
+ for p, v in self.get_params(header=header, unquote=requote):
+ if p.lower() != param.lower():
+ if not new_ctype:
+ new_ctype = _formatparam(p, v, requote)
+ else:
+ new_ctype = SEMISPACE.join([new_ctype,
+ _formatparam(p, v, requote)])
+ if new_ctype != self.get(header):
+ del self[header]
+ self[header] = new_ctype
+
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
+
+ type must be a string in the form "maintype/subtype", otherwise a
+ ValueError is raised.
+
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
+ header's quoting as is. Otherwise, the parameters will be quoted (the
+ default).
+
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
+ header.
+ """
+ # BAW: should we be strict?
+ if not type.count('/') == 1:
+ raise ValueError
+ # Set the Content-Type, you get a MIME-Version
+ if header.lower() == 'content-type':
+ del self['mime-version']
+ self['MIME-Version'] = '1.0'
+ if header not in self:
+ self[header] = type
+ return
+ params = self.get_params(header=header, unquote=requote)
+ del self[header]
+ self[header] = type
+ # Skip the first param; it's the old type.
+ for p, v in params[1:]:
+ self.set_param(p, v, header, requote)
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition header's
+ `filename' parameter, and it is unquoted. If that header is missing
+ the `filename' parameter, this method falls back to looking for the
+ `name' parameter.
+ """
+ missing = object()
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ filename = self.get_param('name', missing, 'content-type')
+ if filename is missing:
+ return failobj
+ return utils.collapse_rfc2231_value(filename).strip()
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = object()
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ # RFC 2046 says that boundaries may begin but not end in w/s
+ return utils.collapse_rfc2231_value(boundary).rstrip()
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type to 'boundary'.
+
+ This is subtly different than deleting the Content-Type header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type header.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, 'content-type')
+ if params is missing:
+ # There was no Content-Type header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise errors.HeaderParseError('No Content-Type header found')
+ newparams = []
+ foundp = False
+ for pk, pv in params:
+ if pk.lower() == 'boundary':
+ newparams.append(('boundary', '"%s"' % boundary))
+ foundp = True
+ else:
+ newparams.append((pk, pv))
+ if not foundp:
+ # The original Content-Type header had no boundary attribute.
+ # Tack one on the end. BAW: should we raise an exception
+ # instead???
+ newparams.append(('boundary', '"%s"' % boundary))
+ # Replace the existing Content-Type header with the new value
+ newheaders = []
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ parts = []
+ for k, v in newparams:
+ if v == '':
+ parts.append(k)
+ else:
+ parts.append('%s=%s' % (k, v))
+ newheaders.append((h, SEMISPACE.join(parts)))
+
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
+ """
+ missing = object()
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, tuple):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ pcharset = charset[0] or 'us-ascii'
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ charset = unicode(charset[2], pcharset).encode('us-ascii')
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset character must be in us-ascii range
+ try:
+ if isinstance(charset, str):
+ charset = unicode(charset, 'us-ascii')
+ charset = charset.encode('us-ascii')
+ except UnicodeError:
+ return failobj
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_content_charset(failobj) for part in self.walk()]
+
+ # I.e. def walk(self): ...
+ from email.iterators import walk
diff --git a/cashew/Lib/email/mime/__init__.py b/cashew/Lib/email/mime/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cashew/Lib/email/mime/application.py b/cashew/Lib/email/mime/application.py
new file mode 100644
index 0000000..f5c5905
--- /dev/null
+++ b/cashew/Lib/email/mime/application.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Keith Dart
+# Contact: email-sig@python.org
+
+"""Class representing application/* type MIME documents."""
+
+__all__ = ["MIMEApplication"]
+
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEApplication(MIMENonMultipart):
+ """Class for generating application/* MIME documents."""
+
+ def __init__(self, _data, _subtype='octet-stream',
+ _encoder=encoders.encode_base64, **_params):
+ """Create an application/* type MIME document.
+
+ _data is a string containing the raw application data.
+
+ _subtype is the MIME content type subtype, defaulting to
+ 'octet-stream'.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the application data, defaulting to base64 encoding.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ raise TypeError('Invalid application MIME subtype')
+ MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
+ self.set_payload(_data)
+ _encoder(self)
diff --git a/cashew/Lib/email/mime/audio.py b/cashew/Lib/email/mime/audio.py
new file mode 100644
index 0000000..c7290c4
--- /dev/null
+++ b/cashew/Lib/email/mime/audio.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from cStringIO import StringIO
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = StringIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+
+
+
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an audio/* type MIME document.
+
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/cashew/Lib/email/mime/base.py b/cashew/Lib/email/mime/base.py
new file mode 100644
index 0000000..ac91925
--- /dev/null
+++ b/cashew/Lib/email/mime/base.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+
+__all__ = ['MIMEBase']
+
+from email import message
+
+
+
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _maintype, _subtype, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ message.Message.__init__(self)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/cashew/Lib/email/mime/image.py b/cashew/Lib/email/mime/image.py
new file mode 100644
index 0000000..5563823
--- /dev/null
+++ b/cashew/Lib/email/mime/image.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/cashew/Lib/email/mime/message.py b/cashew/Lib/email/mime/message.py
new file mode 100644
index 0000000..275dbfd
--- /dev/null
+++ b/cashew/Lib/email/mime/message.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+
+__all__ = ['MIMEMessage']
+
+from email import message
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+
+ def __init__(self, _msg, _subtype='rfc822'):
+ """Create a message/* type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/cashew/Lib/email/mime/multipart.py b/cashew/Lib/email/mime/multipart.py
new file mode 100644
index 0000000..9661865
--- /dev/null
+++ b/cashew/Lib/email/mime/multipart.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME multipart/* type messages."""
+
+__all__ = ['MIMEMultipart']
+
+from email.mime.base import MIMEBase
+
+
+
+class MIMEMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
+ **_params):
+ """Creates a multipart/* type message.
+
+ By default, creates a multipart/mixed message, with proper
+ Content-Type and MIME-Version headers.
+
+ _subtype is the subtype of the multipart content type, defaulting to
+ `mixed'.
+
+ boundary is the multipart boundary string. By default it is
+ calculated as needed.
+
+ _subparts is a sequence of initial subparts for the payload. It
+ must be an iterable object, such as a list. You can always
+ attach new subparts to the message by using the attach() method.
+
+ Additional parameters for the Content-Type header are taken from the
+ keyword arguments (or passed into the _params argument).
+ """
+ MIMEBase.__init__(self, 'multipart', _subtype, **_params)
+
+ # Initialise _payload to an empty list as the Message superclass's
+ # implementation of is_multipart assumes that _payload is a list for
+ # multipart messages.
+ self._payload = []
+
+ if _subparts:
+ for p in _subparts:
+ self.attach(p)
+ if boundary:
+ self.set_boundary(boundary)
diff --git a/cashew/Lib/email/mime/nonmultipart.py b/cashew/Lib/email/mime/nonmultipart.py
new file mode 100644
index 0000000..e1f5196
--- /dev/null
+++ b/cashew/Lib/email/mime/nonmultipart.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+
+__all__ = ['MIMENonMultipart']
+
+from email import errors
+from email.mime.base import MIMEBase
+
+
+
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME non-multipart type messages."""
+
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/cashew/Lib/email/mime/text.py b/cashew/Lib/email/mime/text.py
new file mode 100644
index 0000000..5747db5
--- /dev/null
+++ b/cashew/Lib/email/mime/text.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+
+__all__ = ['MIMEText']
+
+from email.encoders import encode_7or8bit
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _subtype='plain', _charset='us-ascii'):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object.
+
+ _subtype is the MIME sub content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+ MIMENonMultipart.__init__(self, 'text', _subtype,
+ **{'charset': _charset})
+ self.set_payload(_text, _charset)
diff --git a/cashew/Lib/email/parser.py b/cashew/Lib/email/parser.py
new file mode 100644
index 0000000..6dad32a
--- /dev/null
+++ b/cashew/Lib/email/parser.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+
+"""A parser of RFC 2822 and MIME email messages."""
+
+__all__ = ['Parser', 'HeaderParser']
+
+import warnings
+from cStringIO import StringIO
+
+from email.feedparser import FeedParser
+from email.message import Message
+
+
+
+class Parser:
+ def __init__(self, *args, **kws):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ if len(args) >= 1:
+ if '_class' in kws:
+ raise TypeError("Multiple values for keyword arg '_class'")
+ kws['_class'] = args[0]
+ if len(args) == 2:
+ if 'strict' in kws:
+ raise TypeError("Multiple values for keyword arg 'strict'")
+ kws['strict'] = args[1]
+ if len(args) > 2:
+ raise TypeError('Too many arguments')
+ if '_class' in kws:
+ self._class = kws['_class']
+ del kws['_class']
+ else:
+ self._class = Message
+ if 'strict' in kws:
+ warnings.warn("'strict' argument is deprecated (and ignored)",
+ DeprecationWarning, 2)
+ del kws['strict']
+ if kws:
+ raise TypeError('Unexpected keyword arguments')
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+
+
+
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
diff --git a/cashew/Lib/email/quoprimime.py b/cashew/Lib/email/quoprimime.py
new file mode 100644
index 0000000..cd818eb
--- /dev/null
+++ b/cashew/Lib/email/quoprimime.py
@@ -0,0 +1,336 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64mime module for that instead.
+
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding. To deal with the various line
+wrapping issues, use the email.header module.
+"""
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'body_quopri_check',
+ 'body_quopri_len',
+ 'decode',
+ 'decodestring',
+ 'encode',
+ 'encodestring',
+ 'header_decode',
+ 'header_encode',
+ 'header_quopri_check',
+ 'header_quopri_len',
+ 'quote',
+ 'unquote',
+ ]
+
+import re
+
+from string import hexdigits
+from email.utils import fix_eols
+
+CRLF = '\r\n'
+NL = '\n'
+
+# See also Charset.py
+MISC_LEN = 7
+
+hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
+bqre = re.compile(r'[^ !-<>-~\t]')
+
+
+
+# Helpers
+def header_quopri_check(c):
+ """Return True if the character should be escaped with header quopri."""
+ return bool(hqre.match(c))
+
+
+def body_quopri_check(c):
+ """Return True if the character should be escaped with body quopri."""
+ return bool(bqre.match(c))
+
+
+def header_quopri_len(s):
+ """Return the length of str when it is encoded with header quopri."""
+ count = 0
+ for c in s:
+ if hqre.match(c):
+ count += 3
+ else:
+ count += 1
+ return count
+
+
+def body_quopri_len(str):
+ """Return the length of str when it is encoded with body quopri."""
+ count = 0
+ for c in str:
+ if bqre.match(c):
+ count += 3
+ else:
+ count += 1
+ return count
+
+
+def _max_append(L, s, maxlen, extra=''):
+ if not L:
+ L.append(s.lstrip())
+ elif len(L[-1]) + len(s) <= maxlen:
+ L[-1] += extra + s
+ else:
+ L.append(s.lstrip())
+
+
+def unquote(s):
+ """Turn a string in the form =AB to the ASCII character with value 0xab"""
+ return chr(int(s[1:3], 16))
+
+
+def quote(c):
+ return "=%02X" % ord(c)
+
+
+
+def header_encode(header, charset="iso-8859-1", keep_eols=False,
+ maxlinelen=76, eol=NL):
+ """Encode a single header line with quoted-printable (like) encoding.
+
+ Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+ used specifically for email header fields to allow charsets with mostly 7
+ bit characters (and some 8 bit) to remain more or less readable in non-RFC
+ 2045 aware mail clients.
+
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1.
+
+ The resulting string will be in the form:
+
+ "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
+ =?charset?q?Silly_=C8nglish_Kn=EEghts?="
+
+ with each line wrapped safely at, at most, maxlinelen characters (defaults
+ to 76 characters). If maxlinelen is None, the entire string is encoded in
+ one chunk with no splitting.
+
+ End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
+ to the canonical email line separator \\r\\n unless the keep_eols
+ parameter is True (the default is False).
+
+ Each line of the header will be terminated in the value of eol, which
+ defaults to "\\n". Set this to "\\r\\n" if you are using the result of
+ this function directly in email.
+ """
+ # Return empty headers unchanged
+ if not header:
+ return header
+
+ if not keep_eols:
+ header = fix_eols(header)
+
+ # Quopri encode each line, in encoded chunks no greater than maxlinelen in
+ # length, after the RFC chrome is added in.
+ quoted = []
+ if maxlinelen is None:
+ # An obnoxiously large number that's good enough
+ max_encoded = 100000
+ else:
+ max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
+
+ for c in header:
+ # Space may be represented as _ instead of =20 for readability
+ if c == ' ':
+ _max_append(quoted, '_', max_encoded)
+ # These characters can be included verbatim
+ elif not hqre.match(c):
+ _max_append(quoted, c, max_encoded)
+ # Otherwise, replace with hex value like =E2
+ else:
+ _max_append(quoted, "=%02X" % ord(c), max_encoded)
+
+ # Now add the RFC chrome to each encoded chunk and glue the chunks
+ # together. BAW: should we be able to specify the leading whitespace in
+ # the joiner?
+ joiner = eol + ' '
+ return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
+
+
+
+def encode(body, binary=False, maxlinelen=76, eol=NL):
+ """Encode with quoted-printable, wrapping at maxlinelen characters.
+
+ If binary is False (the default), end-of-line characters will be converted
+ to the canonical email end-of-line sequence \\r\\n. Otherwise they will
+ be left verbatim.
+
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters). Long lines will have the `soft linefeed' quoted-printable
+ character "=" appended to them, so the decoded text will be identical to
+ the original text.
+ """
+ if not body:
+ return body
+
+ if not binary:
+ body = fix_eols(body)
+
+ # BAW: We're accumulating the body text by string concatenation. That
+ # can't be very efficient, but I don't have time now to rewrite it. It
+ # just feels like this algorithm could be more efficient.
+ encoded_body = ''
+ lineno = -1
+ # Preserve line endings here so we can check later to see an eol needs to
+ # be added to the output later.
+ lines = body.splitlines(1)
+ for line in lines:
+ # But strip off line-endings for processing this line.
+ if line.endswith(CRLF):
+ line = line[:-2]
+ elif line[-1] in CRLF:
+ line = line[:-1]
+
+ lineno += 1
+ encoded_line = ''
+ prev = None
+ linelen = len(line)
+ # Now we need to examine every character to see if it needs to be
+ # quopri encoded. BAW: again, string concatenation is inefficient.
+ for j in range(linelen):
+ c = line[j]
+ prev = c
+ if bqre.match(c):
+ c = quote(c)
+ elif j+1 == linelen:
+ # Check for whitespace at end of line; special case
+ if c not in ' \t':
+ encoded_line += c
+ prev = c
+ continue
+ # Check to see to see if the line has reached its maximum length
+ if len(encoded_line) + len(c) >= maxlinelen:
+ encoded_body += encoded_line + '=' + eol
+ encoded_line = ''
+ encoded_line += c
+ # Now at end of line..
+ if prev and prev in ' \t':
+ # Special case for whitespace at end of file
+ if lineno + 1 == len(lines):
+ prev = quote(prev)
+ if len(encoded_line) + len(prev) > maxlinelen:
+ encoded_body += encoded_line + '=' + eol + prev
+ else:
+ encoded_body += encoded_line + prev
+ # Just normal whitespace at end of line
+ else:
+ encoded_body += encoded_line + prev + '=' + eol
+ encoded_line = ''
+ # Now look at the line we just finished and it has a line ending, we
+ # need to add eol to the end of the line.
+ if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
+ encoded_body += encoded_line + eol
+ else:
+ encoded_body += encoded_line
+ encoded_line = ''
+ return encoded_body
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_encode = encode
+encodestring = encode
+
+
+
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+ """Decode a quoted-printable string.
+
+ Lines are separated with eol, which defaults to \\n.
+ """
+ if not encoded:
+ return encoded
+ # BAW: see comment in encode() above. Again, we're building up the
+ # decoded string with string concatenation, which could be done much more
+ # efficiently.
+ decoded = ''
+
+ for line in encoded.splitlines():
+ line = line.rstrip()
+ if not line:
+ decoded += eol
+ continue
+
+ i = 0
+ n = len(line)
+ while i < n:
+ c = line[i]
+ if c != '=':
+ decoded += c
+ i += 1
+ # Otherwise, c == "=". Are we at the end of the line? If so, add
+ # a soft line break.
+ elif i+1 == n:
+ i += 1
+ continue
+ # Decode if in form =AB
+ elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+ decoded += unquote(line[i:i+3])
+ i += 3
+ # Otherwise, not in form =AB, pass literally
+ else:
+ decoded += c
+ i += 1
+
+ if i == n:
+ decoded += eol
+ # Special case if original string did not end with eol
+ if not encoded.endswith(eol) and decoded.endswith(eol):
+ decoded = decoded[:-1]
+ return decoded
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+
+
+
+def _unquote_match(match):
+ """Turn a match in the form =AB to the ASCII character with value 0xab"""
+ s = match.group(0)
+ return unquote(s)
+
+
+# Header decoding is done a bit differently
+def header_decode(s):
+ """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+
+ This function does not parse a full MIME header value encoded with
+ quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use
+ the high level email.header class for that functionality.
+ """
+ s = s.replace('_', ' ')
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s)
diff --git a/cashew/Lib/email/utils.py b/cashew/Lib/email/utils.py
new file mode 100644
index 0000000..5b22521
--- /dev/null
+++ b/cashew/Lib/email/utils.py
@@ -0,0 +1,329 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Miscellaneous utilities."""
+
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'unquote',
+ ]
+
+import os
+import re
+import time
+import base64
+import random
+import socket
+import urllib
+import warnings
+
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
+
+# We need wormarounds for bugs in these methods in older Pythons (see below)
+from email._parseaddr import parsedate as _parsedate
+from email._parseaddr import parsedate_tz as _parsedate_tz
+
+from quopri import decodestring as _qdecode
+
+# Intrapackage imports
+from email.encoders import _bencode, _qencode
+
+COMMASPACE = ', '
+EMPTYSTRING = ''
+UEMPTYSTRING = u''
+CRLF = '\r\n'
+TICK = "'"
+
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[][\\()"]')
+
+
+
+# Helpers
+
+def _identity(s):
+ return s
+
+
+def _bdecode(s):
+ """Decodes a base64 string.
+
+ This function is equivalent to base64.decodestring and it's retained only
+ for backward compatibility. It used to remove the last \\n of the decoded
+ string, if it had any (see issue 7143).
+ """
+ if not s:
+ return s
+ return base64.decodestring(s)
+
+
+
+def fix_eols(s):
+ """Replace all line-ending characters with \\r\\n."""
+ # Fix newlines with no preceding carriage return
+ s = re.sub(r'(?', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+
+
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+
+
+
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+
+ Fri, 09 Nov 2001 01:08:47 -0000
+
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime:
+ now = time.localtime(timeval)
+ # Calculate timezone offset, based on whether the local zone has
+ # daylight savings time, and whether DST is in effect.
+ if time.daylight and now[-1]:
+ offset = time.altzone
+ else:
+ offset = time.timezone
+ hours, minutes = divmod(abs(offset), 3600)
+ # Remember offset is in seconds west of UTC, but the timezone is in
+ # minutes east of UTC, so the signs differ.
+ if offset > 0:
+ sign = '-'
+ else:
+ sign = '+'
+ zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+ else:
+ now = time.gmtime(timeval)
+ # Timezone offset is always -0000
+ if usegmt:
+ zone = 'GMT'
+ else:
+ zone = '-0000'
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
+ now[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
+ now[0], now[3], now[4], now[5],
+ zone)
+
+
+
+def make_msgid(idstring=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+
+ <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
+
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id.
+ """
+ timeval = int(time.time()*100)
+ pid = os.getpid()
+ randint = random.getrandbits(64)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ idhost = socket.getfqdn()
+ msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost)
+ return msgid
+
+
+
+# These functions are in the standalone mimelib version only because they've
+# subsequently been fixed in the latest Python versions. We use this to worm
+# around broken older Pythons.
+def parsedate(data):
+ if not data:
+ return None
+ return _parsedate(data)
+
+
+def parsedate_tz(data):
+ if not data:
+ return None
+ return _parsedate_tz(data)
+
+
+def parseaddr(addr):
+ """
+ Parse addr into its constituent realname and email address parts.
+
+ Return a tuple of realname and email address, unless the parse fails, in
+ which case return a 2-tuple of ('', '').
+ """
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+
+
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+
+
+
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+
+
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ import urllib
+ s = urllib.quote(s, safe='')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+
+
+rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$')
+
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ s = urllib.unquote(s)
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if isinstance(value, tuple):
+ rawval = unquote(value[2])
+ charset = value[0] or 'us-ascii'
+ try:
+ return unicode(rawval, charset, errors)
+ except LookupError:
+ # XXX charset is unknown to Python.
+ return unicode(rawval, fallback_charset, errors)
+ else:
+ return unquote(value)
diff --git a/cashew/Lib/encodings/__init__.py b/cashew/Lib/encodings/__init__.py
new file mode 100644
index 0000000..b85ca82
--- /dev/null
+++ b/cashew/Lib/encodings/__init__.py
@@ -0,0 +1,157 @@
+""" Standard "encodings" Package
+
+ Standard Python encoding modules are stored in this package
+ directory.
+
+ Codec modules must have names corresponding to normalized encoding
+ names as defined in the normalize_encoding() function below, e.g.
+ 'utf-8' must be implemented by the module 'utf_8.py'.
+
+ Each codec module must export the following interface:
+
+ * getregentry() -> codecs.CodecInfo object
+ The getregentry() API must a CodecInfo object with encoder, decoder,
+ incrementalencoder, incrementaldecoder, streamwriter and streamreader
+ atttributes which adhere to the Python Codec Interface Standard.
+
+ In addition, a module may optionally also define the following
+ APIs which are then used by the package's codec search function:
+
+ * getaliases() -> sequence of encoding name strings to use as aliases
+
+ Alias names returned by getaliases() must be normalized encoding
+ names as defined by normalize_encoding().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+
+import codecs
+from encodings import aliases
+import __builtin__
+
+_cache = {}
+_unknown = '--unknown--'
+_import_tail = ['*']
+_norm_encoding_map = (' . '
+ '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
+ ' abcdefghijklmnopqrstuvwxyz '
+ ' '
+ ' '
+ ' ')
+_aliases = aliases.aliases
+
+class CodecRegistryError(LookupError, SystemError):
+ pass
+
+def normalize_encoding(encoding):
+
+ """ Normalize an encoding name.
+
+ Normalization works as follows: all non-alphanumeric
+ characters except the dot used for Python package names are
+ collapsed and replaced with a single underscore, e.g. ' -;#'
+ becomes '_'. Leading and trailing underscores are removed.
+
+ Note that encoding names should be ASCII only; if they do use
+ non-ASCII characters, these must be Latin-1 compatible.
+
+ """
+ # Make sure we have an 8-bit string, because .translate() works
+ # differently for Unicode strings.
+ if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):
+ # Note that .encode('latin-1') does *not* use the codec
+ # registry, so this call doesn't recurse. (See unicodeobject.c
+ # PyUnicode_AsEncodedString() for details)
+ encoding = encoding.encode('latin-1')
+ return '_'.join(encoding.translate(_norm_encoding_map).split())
+
+def search_function(encoding):
+
+ # Cache lookup
+ entry = _cache.get(encoding, _unknown)
+ if entry is not _unknown:
+ return entry
+
+ # Import the module:
+ #
+ # First try to find an alias for the normalized encoding
+ # name and lookup the module using the aliased name, then try to
+ # lookup the module using the standard import scheme, i.e. first
+ # try in the encodings package, then at top-level.
+ #
+ norm_encoding = normalize_encoding(encoding)
+ aliased_encoding = _aliases.get(norm_encoding) or \
+ _aliases.get(norm_encoding.replace('.', '_'))
+ if aliased_encoding is not None:
+ modnames = [aliased_encoding,
+ norm_encoding]
+ else:
+ modnames = [norm_encoding]
+ for modname in modnames:
+ if not modname or '.' in modname:
+ continue
+ try:
+ # Import is absolute to prevent the possibly malicious import of a
+ # module with side-effects that is not in the 'encodings' package.
+ mod = __import__('encodings.' + modname, fromlist=_import_tail,
+ level=0)
+ except ImportError:
+ pass
+ else:
+ break
+ else:
+ mod = None
+
+ try:
+ getregentry = mod.getregentry
+ except AttributeError:
+ # Not a codec module
+ mod = None
+
+ if mod is None:
+ # Cache misses
+ _cache[encoding] = None
+ return None
+
+ # Now ask the module for the registry entry
+ entry = getregentry()
+ if not isinstance(entry, codecs.CodecInfo):
+ if not 4 <= len(entry) <= 7:
+ raise CodecRegistryError,\
+ 'module "%s" (%s) failed to register' % \
+ (mod.__name__, mod.__file__)
+ if not hasattr(entry[0], '__call__') or \
+ not hasattr(entry[1], '__call__') or \
+ (entry[2] is not None and not hasattr(entry[2], '__call__')) or \
+ (entry[3] is not None and not hasattr(entry[3], '__call__')) or \
+ (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \
+ (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):
+ raise CodecRegistryError,\
+ 'incompatible codecs in module "%s" (%s)' % \
+ (mod.__name__, mod.__file__)
+ if len(entry)<7 or entry[6] is None:
+ entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
+ entry = codecs.CodecInfo(*entry)
+
+ # Cache the codec registry entry
+ _cache[encoding] = entry
+
+ # Register its aliases (without overwriting previously registered
+ # aliases)
+ try:
+ codecaliases = mod.getaliases()
+ except AttributeError:
+ pass
+ else:
+ for alias in codecaliases:
+ if alias not in _aliases:
+ _aliases[alias] = modname
+
+ # Return the registry entry
+ return entry
+
+# Register the search_function in the Python codec registry
+codecs.register(search_function)
diff --git a/cashew/Lib/encodings/aliases.py b/cashew/Lib/encodings/aliases.py
new file mode 100644
index 0000000..a54cf77
--- /dev/null
+++ b/cashew/Lib/encodings/aliases.py
@@ -0,0 +1,527 @@
+""" Encoding Aliases Support
+
+ This module is used by the encodings package search function to
+ map encodings names to module names.
+
+ Note that the search function normalizes the encoding names before
+ doing the lookup, so the mapping will have to map normalized
+ encoding names to module names.
+
+ Contents:
+
+ The following aliases dictionary contains mappings of all IANA
+ character set names for which the Python core library provides
+ codecs. In addition to these, a few Python specific codec
+ aliases have also been added.
+
+"""
+aliases = {
+
+ # Please keep this list sorted alphabetically by value !
+
+ # ascii codec
+ '646' : 'ascii',
+ 'ansi_x3.4_1968' : 'ascii',
+ 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
+ 'ansi_x3.4_1986' : 'ascii',
+ 'cp367' : 'ascii',
+ 'csascii' : 'ascii',
+ 'ibm367' : 'ascii',
+ 'iso646_us' : 'ascii',
+ 'iso_646.irv_1991' : 'ascii',
+ 'iso_ir_6' : 'ascii',
+ 'us' : 'ascii',
+ 'us_ascii' : 'ascii',
+
+ # base64_codec codec
+ 'base64' : 'base64_codec',
+ 'base_64' : 'base64_codec',
+
+ # big5 codec
+ 'big5_tw' : 'big5',
+ 'csbig5' : 'big5',
+
+ # big5hkscs codec
+ 'big5_hkscs' : 'big5hkscs',
+ 'hkscs' : 'big5hkscs',
+
+ # bz2_codec codec
+ 'bz2' : 'bz2_codec',
+
+ # cp037 codec
+ '037' : 'cp037',
+ 'csibm037' : 'cp037',
+ 'ebcdic_cp_ca' : 'cp037',
+ 'ebcdic_cp_nl' : 'cp037',
+ 'ebcdic_cp_us' : 'cp037',
+ 'ebcdic_cp_wt' : 'cp037',
+ 'ibm037' : 'cp037',
+ 'ibm039' : 'cp037',
+
+ # cp1026 codec
+ '1026' : 'cp1026',
+ 'csibm1026' : 'cp1026',
+ 'ibm1026' : 'cp1026',
+
+ # cp1140 codec
+ '1140' : 'cp1140',
+ 'ibm1140' : 'cp1140',
+
+ # cp1250 codec
+ '1250' : 'cp1250',
+ 'windows_1250' : 'cp1250',
+
+ # cp1251 codec
+ '1251' : 'cp1251',
+ 'windows_1251' : 'cp1251',
+
+ # cp1252 codec
+ '1252' : 'cp1252',
+ 'windows_1252' : 'cp1252',
+
+ # cp1253 codec
+ '1253' : 'cp1253',
+ 'windows_1253' : 'cp1253',
+
+ # cp1254 codec
+ '1254' : 'cp1254',
+ 'windows_1254' : 'cp1254',
+
+ # cp1255 codec
+ '1255' : 'cp1255',
+ 'windows_1255' : 'cp1255',
+
+ # cp1256 codec
+ '1256' : 'cp1256',
+ 'windows_1256' : 'cp1256',
+
+ # cp1257 codec
+ '1257' : 'cp1257',
+ 'windows_1257' : 'cp1257',
+
+ # cp1258 codec
+ '1258' : 'cp1258',
+ 'windows_1258' : 'cp1258',
+
+ # cp424 codec
+ '424' : 'cp424',
+ 'csibm424' : 'cp424',
+ 'ebcdic_cp_he' : 'cp424',
+ 'ibm424' : 'cp424',
+
+ # cp437 codec
+ '437' : 'cp437',
+ 'cspc8codepage437' : 'cp437',
+ 'ibm437' : 'cp437',
+
+ # cp500 codec
+ '500' : 'cp500',
+ 'csibm500' : 'cp500',
+ 'ebcdic_cp_be' : 'cp500',
+ 'ebcdic_cp_ch' : 'cp500',
+ 'ibm500' : 'cp500',
+
+ # cp775 codec
+ '775' : 'cp775',
+ 'cspc775baltic' : 'cp775',
+ 'ibm775' : 'cp775',
+
+ # cp850 codec
+ '850' : 'cp850',
+ 'cspc850multilingual' : 'cp850',
+ 'ibm850' : 'cp850',
+
+ # cp852 codec
+ '852' : 'cp852',
+ 'cspcp852' : 'cp852',
+ 'ibm852' : 'cp852',
+
+ # cp855 codec
+ '855' : 'cp855',
+ 'csibm855' : 'cp855',
+ 'ibm855' : 'cp855',
+
+ # cp857 codec
+ '857' : 'cp857',
+ 'csibm857' : 'cp857',
+ 'ibm857' : 'cp857',
+
+ # cp858 codec
+ '858' : 'cp858',
+ 'csibm858' : 'cp858',
+ 'ibm858' : 'cp858',
+
+ # cp860 codec
+ '860' : 'cp860',
+ 'csibm860' : 'cp860',
+ 'ibm860' : 'cp860',
+
+ # cp861 codec
+ '861' : 'cp861',
+ 'cp_is' : 'cp861',
+ 'csibm861' : 'cp861',
+ 'ibm861' : 'cp861',
+
+ # cp862 codec
+ '862' : 'cp862',
+ 'cspc862latinhebrew' : 'cp862',
+ 'ibm862' : 'cp862',
+
+ # cp863 codec
+ '863' : 'cp863',
+ 'csibm863' : 'cp863',
+ 'ibm863' : 'cp863',
+
+ # cp864 codec
+ '864' : 'cp864',
+ 'csibm864' : 'cp864',
+ 'ibm864' : 'cp864',
+
+ # cp865 codec
+ '865' : 'cp865',
+ 'csibm865' : 'cp865',
+ 'ibm865' : 'cp865',
+
+ # cp866 codec
+ '866' : 'cp866',
+ 'csibm866' : 'cp866',
+ 'ibm866' : 'cp866',
+
+ # cp869 codec
+ '869' : 'cp869',
+ 'cp_gr' : 'cp869',
+ 'csibm869' : 'cp869',
+ 'ibm869' : 'cp869',
+
+ # cp932 codec
+ '932' : 'cp932',
+ 'ms932' : 'cp932',
+ 'mskanji' : 'cp932',
+ 'ms_kanji' : 'cp932',
+
+ # cp949 codec
+ '949' : 'cp949',
+ 'ms949' : 'cp949',
+ 'uhc' : 'cp949',
+
+ # cp950 codec
+ '950' : 'cp950',
+ 'ms950' : 'cp950',
+
+ # euc_jis_2004 codec
+ 'jisx0213' : 'euc_jis_2004',
+ 'eucjis2004' : 'euc_jis_2004',
+ 'euc_jis2004' : 'euc_jis_2004',
+
+ # euc_jisx0213 codec
+ 'eucjisx0213' : 'euc_jisx0213',
+
+ # euc_jp codec
+ 'eucjp' : 'euc_jp',
+ 'ujis' : 'euc_jp',
+ 'u_jis' : 'euc_jp',
+
+ # euc_kr codec
+ 'euckr' : 'euc_kr',
+ 'korean' : 'euc_kr',
+ 'ksc5601' : 'euc_kr',
+ 'ks_c_5601' : 'euc_kr',
+ 'ks_c_5601_1987' : 'euc_kr',
+ 'ksx1001' : 'euc_kr',
+ 'ks_x_1001' : 'euc_kr',
+
+ # gb18030 codec
+ 'gb18030_2000' : 'gb18030',
+
+ # gb2312 codec
+ 'chinese' : 'gb2312',
+ 'csiso58gb231280' : 'gb2312',
+ 'euc_cn' : 'gb2312',
+ 'euccn' : 'gb2312',
+ 'eucgb2312_cn' : 'gb2312',
+ 'gb2312_1980' : 'gb2312',
+ 'gb2312_80' : 'gb2312',
+ 'iso_ir_58' : 'gb2312',
+
+ # gbk codec
+ '936' : 'gbk',
+ 'cp936' : 'gbk',
+ 'ms936' : 'gbk',
+
+ # hex_codec codec
+ 'hex' : 'hex_codec',
+
+ # hp_roman8 codec
+ 'roman8' : 'hp_roman8',
+ 'r8' : 'hp_roman8',
+ 'csHPRoman8' : 'hp_roman8',
+
+ # hz codec
+ 'hzgb' : 'hz',
+ 'hz_gb' : 'hz',
+ 'hz_gb_2312' : 'hz',
+
+ # iso2022_jp codec
+ 'csiso2022jp' : 'iso2022_jp',
+ 'iso2022jp' : 'iso2022_jp',
+ 'iso_2022_jp' : 'iso2022_jp',
+
+ # iso2022_jp_1 codec
+ 'iso2022jp_1' : 'iso2022_jp_1',
+ 'iso_2022_jp_1' : 'iso2022_jp_1',
+
+ # iso2022_jp_2 codec
+ 'iso2022jp_2' : 'iso2022_jp_2',
+ 'iso_2022_jp_2' : 'iso2022_jp_2',
+
+ # iso2022_jp_2004 codec
+ 'iso_2022_jp_2004' : 'iso2022_jp_2004',
+ 'iso2022jp_2004' : 'iso2022_jp_2004',
+
+ # iso2022_jp_3 codec
+ 'iso2022jp_3' : 'iso2022_jp_3',
+ 'iso_2022_jp_3' : 'iso2022_jp_3',
+
+ # iso2022_jp_ext codec
+ 'iso2022jp_ext' : 'iso2022_jp_ext',
+ 'iso_2022_jp_ext' : 'iso2022_jp_ext',
+
+ # iso2022_kr codec
+ 'csiso2022kr' : 'iso2022_kr',
+ 'iso2022kr' : 'iso2022_kr',
+ 'iso_2022_kr' : 'iso2022_kr',
+
+ # iso8859_10 codec
+ 'csisolatin6' : 'iso8859_10',
+ 'iso_8859_10' : 'iso8859_10',
+ 'iso_8859_10_1992' : 'iso8859_10',
+ 'iso_ir_157' : 'iso8859_10',
+ 'l6' : 'iso8859_10',
+ 'latin6' : 'iso8859_10',
+
+ # iso8859_11 codec
+ 'thai' : 'iso8859_11',
+ 'iso_8859_11' : 'iso8859_11',
+ 'iso_8859_11_2001' : 'iso8859_11',
+
+ # iso8859_13 codec
+ 'iso_8859_13' : 'iso8859_13',
+ 'l7' : 'iso8859_13',
+ 'latin7' : 'iso8859_13',
+
+ # iso8859_14 codec
+ 'iso_8859_14' : 'iso8859_14',
+ 'iso_8859_14_1998' : 'iso8859_14',
+ 'iso_celtic' : 'iso8859_14',
+ 'iso_ir_199' : 'iso8859_14',
+ 'l8' : 'iso8859_14',
+ 'latin8' : 'iso8859_14',
+
+ # iso8859_15 codec
+ 'iso_8859_15' : 'iso8859_15',
+ 'l9' : 'iso8859_15',
+ 'latin9' : 'iso8859_15',
+
+ # iso8859_16 codec
+ 'iso_8859_16' : 'iso8859_16',
+ 'iso_8859_16_2001' : 'iso8859_16',
+ 'iso_ir_226' : 'iso8859_16',
+ 'l10' : 'iso8859_16',
+ 'latin10' : 'iso8859_16',
+
+ # iso8859_2 codec
+ 'csisolatin2' : 'iso8859_2',
+ 'iso_8859_2' : 'iso8859_2',
+ 'iso_8859_2_1987' : 'iso8859_2',
+ 'iso_ir_101' : 'iso8859_2',
+ 'l2' : 'iso8859_2',
+ 'latin2' : 'iso8859_2',
+
+ # iso8859_3 codec
+ 'csisolatin3' : 'iso8859_3',
+ 'iso_8859_3' : 'iso8859_3',
+ 'iso_8859_3_1988' : 'iso8859_3',
+ 'iso_ir_109' : 'iso8859_3',
+ 'l3' : 'iso8859_3',
+ 'latin3' : 'iso8859_3',
+
+ # iso8859_4 codec
+ 'csisolatin4' : 'iso8859_4',
+ 'iso_8859_4' : 'iso8859_4',
+ 'iso_8859_4_1988' : 'iso8859_4',
+ 'iso_ir_110' : 'iso8859_4',
+ 'l4' : 'iso8859_4',
+ 'latin4' : 'iso8859_4',
+
+ # iso8859_5 codec
+ 'csisolatincyrillic' : 'iso8859_5',
+ 'cyrillic' : 'iso8859_5',
+ 'iso_8859_5' : 'iso8859_5',
+ 'iso_8859_5_1988' : 'iso8859_5',
+ 'iso_ir_144' : 'iso8859_5',
+
+ # iso8859_6 codec
+ 'arabic' : 'iso8859_6',
+ 'asmo_708' : 'iso8859_6',
+ 'csisolatinarabic' : 'iso8859_6',
+ 'ecma_114' : 'iso8859_6',
+ 'iso_8859_6' : 'iso8859_6',
+ 'iso_8859_6_1987' : 'iso8859_6',
+ 'iso_ir_127' : 'iso8859_6',
+
+ # iso8859_7 codec
+ 'csisolatingreek' : 'iso8859_7',
+ 'ecma_118' : 'iso8859_7',
+ 'elot_928' : 'iso8859_7',
+ 'greek' : 'iso8859_7',
+ 'greek8' : 'iso8859_7',
+ 'iso_8859_7' : 'iso8859_7',
+ 'iso_8859_7_1987' : 'iso8859_7',
+ 'iso_ir_126' : 'iso8859_7',
+
+ # iso8859_8 codec
+ 'csisolatinhebrew' : 'iso8859_8',
+ 'hebrew' : 'iso8859_8',
+ 'iso_8859_8' : 'iso8859_8',
+ 'iso_8859_8_1988' : 'iso8859_8',
+ 'iso_ir_138' : 'iso8859_8',
+
+ # iso8859_9 codec
+ 'csisolatin5' : 'iso8859_9',
+ 'iso_8859_9' : 'iso8859_9',
+ 'iso_8859_9_1989' : 'iso8859_9',
+ 'iso_ir_148' : 'iso8859_9',
+ 'l5' : 'iso8859_9',
+ 'latin5' : 'iso8859_9',
+
+ # johab codec
+ 'cp1361' : 'johab',
+ 'ms1361' : 'johab',
+
+ # koi8_r codec
+ 'cskoi8r' : 'koi8_r',
+
+ # latin_1 codec
+ #
+ # Note that the latin_1 codec is implemented internally in C and a
+ # lot faster than the charmap codec iso8859_1 which uses the same
+ # encoding. This is why we discourage the use of the iso8859_1
+ # codec and alias it to latin_1 instead.
+ #
+ '8859' : 'latin_1',
+ 'cp819' : 'latin_1',
+ 'csisolatin1' : 'latin_1',
+ 'ibm819' : 'latin_1',
+ 'iso8859' : 'latin_1',
+ 'iso8859_1' : 'latin_1',
+ 'iso_8859_1' : 'latin_1',
+ 'iso_8859_1_1987' : 'latin_1',
+ 'iso_ir_100' : 'latin_1',
+ 'l1' : 'latin_1',
+ 'latin' : 'latin_1',
+ 'latin1' : 'latin_1',
+
+ # mac_cyrillic codec
+ 'maccyrillic' : 'mac_cyrillic',
+
+ # mac_greek codec
+ 'macgreek' : 'mac_greek',
+
+ # mac_iceland codec
+ 'maciceland' : 'mac_iceland',
+
+ # mac_latin2 codec
+ 'maccentraleurope' : 'mac_latin2',
+ 'maclatin2' : 'mac_latin2',
+
+ # mac_roman codec
+ 'macroman' : 'mac_roman',
+
+ # mac_turkish codec
+ 'macturkish' : 'mac_turkish',
+
+ # mbcs codec
+ 'dbcs' : 'mbcs',
+
+ # ptcp154 codec
+ 'csptcp154' : 'ptcp154',
+ 'pt154' : 'ptcp154',
+ 'cp154' : 'ptcp154',
+ 'cyrillic_asian' : 'ptcp154',
+
+ # quopri_codec codec
+ 'quopri' : 'quopri_codec',
+ 'quoted_printable' : 'quopri_codec',
+ 'quotedprintable' : 'quopri_codec',
+
+ # rot_13 codec
+ 'rot13' : 'rot_13',
+
+ # shift_jis codec
+ 'csshiftjis' : 'shift_jis',
+ 'shiftjis' : 'shift_jis',
+ 'sjis' : 'shift_jis',
+ 's_jis' : 'shift_jis',
+
+ # shift_jis_2004 codec
+ 'shiftjis2004' : 'shift_jis_2004',
+ 'sjis_2004' : 'shift_jis_2004',
+ 's_jis_2004' : 'shift_jis_2004',
+
+ # shift_jisx0213 codec
+ 'shiftjisx0213' : 'shift_jisx0213',
+ 'sjisx0213' : 'shift_jisx0213',
+ 's_jisx0213' : 'shift_jisx0213',
+
+ # tactis codec
+ 'tis260' : 'tactis',
+
+ # tis_620 codec
+ 'tis620' : 'tis_620',
+ 'tis_620_0' : 'tis_620',
+ 'tis_620_2529_0' : 'tis_620',
+ 'tis_620_2529_1' : 'tis_620',
+ 'iso_ir_166' : 'tis_620',
+
+ # utf_16 codec
+ 'u16' : 'utf_16',
+ 'utf16' : 'utf_16',
+
+ # utf_16_be codec
+ 'unicodebigunmarked' : 'utf_16_be',
+ 'utf_16be' : 'utf_16_be',
+
+ # utf_16_le codec
+ 'unicodelittleunmarked' : 'utf_16_le',
+ 'utf_16le' : 'utf_16_le',
+
+ # utf_32 codec
+ 'u32' : 'utf_32',
+ 'utf32' : 'utf_32',
+
+ # utf_32_be codec
+ 'utf_32be' : 'utf_32_be',
+
+ # utf_32_le codec
+ 'utf_32le' : 'utf_32_le',
+
+ # utf_7 codec
+ 'u7' : 'utf_7',
+ 'utf7' : 'utf_7',
+ 'unicode_1_1_utf_7' : 'utf_7',
+
+ # utf_8 codec
+ 'u8' : 'utf_8',
+ 'utf' : 'utf_8',
+ 'utf8' : 'utf_8',
+ 'utf8_ucs2' : 'utf_8',
+ 'utf8_ucs4' : 'utf_8',
+
+ # uu_codec codec
+ 'uu' : 'uu_codec',
+
+ # zlib_codec codec
+ 'zip' : 'zlib_codec',
+ 'zlib' : 'zlib_codec',
+
+}
diff --git a/cashew/Lib/encodings/ascii.py b/cashew/Lib/encodings/ascii.py
new file mode 100644
index 0000000..2033cde
--- /dev/null
+++ b/cashew/Lib/encodings/ascii.py
@@ -0,0 +1,50 @@
+""" Python 'ascii' Codec
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ # Note: Binding these as C functions will result in the class not
+ # converting them to methods. This is intended.
+ encode = codecs.ascii_encode
+ decode = codecs.ascii_decode
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.ascii_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.ascii_decode(input, self.errors)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+class StreamConverter(StreamWriter,StreamReader):
+
+ encode = codecs.ascii_decode
+ decode = codecs.ascii_encode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='ascii',
+ encode=Codec.encode,
+ decode=Codec.decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/cashew/Lib/encodings/base64_codec.py b/cashew/Lib/encodings/base64_codec.py
new file mode 100644
index 0000000..34ac555
--- /dev/null
+++ b/cashew/Lib/encodings/base64_codec.py
@@ -0,0 +1,80 @@
+""" Python 'base64_codec' Codec - base64 content transfer encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, base64
+
+### Codec APIs
+
+def base64_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = base64.encodestring(input)
+ return (output, len(input))
+
+def base64_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = base64.decodestring(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ def encode(self, input,errors='strict'):
+ return base64_encode(input,errors)
+ def decode(self, input,errors='strict'):
+ return base64_decode(input,errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ assert self.errors == 'strict'
+ return base64.encodestring(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ assert self.errors == 'strict'
+ return base64.decodestring(input)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='base64',
+ encode=base64_encode,
+ decode=base64_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/cashew/Lib/encodings/bz2_codec.py b/cashew/Lib/encodings/bz2_codec.py
new file mode 100644
index 0000000..136503a
--- /dev/null
+++ b/cashew/Lib/encodings/bz2_codec.py
@@ -0,0 +1,103 @@
+""" Python 'bz2_codec' Codec - bz2 compression encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Adapted by Raymond Hettinger from zlib_codec.py which was written
+ by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs
+import bz2 # this codec needs the optional bz2 module !
+
+### Codec APIs
+
+def bz2_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = bz2.compress(input)
+ return (output, len(input))
+
+def bz2_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = bz2.decompress(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ def encode(self, input, errors='strict'):
+ return bz2_encode(input, errors)
+ def decode(self, input, errors='strict'):
+ return bz2_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ assert errors == 'strict'
+ self.errors = errors
+ self.compressobj = bz2.BZ2Compressor()
+
+ def encode(self, input, final=False):
+ if final:
+ c = self.compressobj.compress(input)
+ return c + self.compressobj.flush()
+ else:
+ return self.compressobj.compress(input)
+
+ def reset(self):
+ self.compressobj = bz2.BZ2Compressor()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def __init__(self, errors='strict'):
+ assert errors == 'strict'
+ self.errors = errors
+ self.decompressobj = bz2.BZ2Decompressor()
+
+ def decode(self, input, final=False):
+ try:
+ return self.decompressobj.decompress(input)
+ except EOFError:
+ return ''
+
+ def reset(self):
+ self.decompressobj = bz2.BZ2Decompressor()
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name="bz2",
+ encode=bz2_encode,
+ decode=bz2_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/cashew/Lib/encodings/charmap.py b/cashew/Lib/encodings/charmap.py
new file mode 100644
index 0000000..81189b1
--- /dev/null
+++ b/cashew/Lib/encodings/charmap.py
@@ -0,0 +1,69 @@
+""" Generic Python Character Mapping Codec.
+
+ Use this codec directly rather than through the automatic
+ conversion mechanisms supplied by unicode() and .encode().
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ # Note: Binding these as C functions will result in the class not
+ # converting them to methods. This is intended.
+ encode = codecs.charmap_encode
+ decode = codecs.charmap_decode
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict', mapping=None):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.mapping = mapping
+
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input, self.errors, self.mapping)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def __init__(self, errors='strict', mapping=None):
+ codecs.IncrementalDecoder.__init__(self, errors)
+ self.mapping = mapping
+
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input, self.errors, self.mapping)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+
+ def __init__(self,stream,errors='strict',mapping=None):
+ codecs.StreamWriter.__init__(self,stream,errors)
+ self.mapping = mapping
+
+ def encode(self,input,errors='strict'):
+ return Codec.encode(input,errors,self.mapping)
+
+class StreamReader(Codec,codecs.StreamReader):
+
+ def __init__(self,stream,errors='strict',mapping=None):
+ codecs.StreamReader.__init__(self,stream,errors)
+ self.mapping = mapping
+
+ def decode(self,input,errors='strict'):
+ return Codec.decode(input,errors,self.mapping)
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='charmap',
+ encode=Codec.encode,
+ decode=Codec.decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/cashew/Lib/encodings/cp037.py b/cashew/Lib/encodings/cp037.py
new file mode 100644
index 0000000..c802b89
--- /dev/null
+++ b/cashew/Lib/encodings/cp037.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp037',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> CONTROL
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> CONTROL
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> CONTROL
+ u'\x8d' # 0x09 -> CONTROL
+ u'\x8e' # 0x0A -> CONTROL
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> CONTROL
+ u'\x85' # 0x15 -> CONTROL
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> CONTROL
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> CONTROL
+ u'\x8f' # 0x1B -> CONTROL
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> CONTROL
+ u'\x81' # 0x21 -> CONTROL
+ u'\x82' # 0x22 -> CONTROL
+ u'\x83' # 0x23 -> CONTROL
+ u'\x84' # 0x24 -> CONTROL
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> CONTROL
+ u'\x89' # 0x29 -> CONTROL
+ u'\x8a' # 0x2A -> CONTROL
+ u'\x8b' # 0x2B -> CONTROL
+ u'\x8c' # 0x2C -> CONTROL
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 -> CONTROL
+ u'\x91' # 0x31 -> CONTROL
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> CONTROL
+ u'\x94' # 0x34 -> CONTROL
+ u'\x95' # 0x35 -> CONTROL
+ u'\x96' # 0x36 -> CONTROL
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> CONTROL
+ u'\x99' # 0x39 -> CONTROL
+ u'\x9a' # 0x3A -> CONTROL
+ u'\x9b' # 0x3B -> CONTROL
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E -> CONTROL
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\xa0' # 0x41 -> NO-BREAK SPACE
+ u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xa2' # 0x4A -> CENT SIGN
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'|' # 0x4F -> VERTICAL LINE
+ u'&' # 0x50 -> AMPERSAND
+ u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u'!' # 0x5A -> EXCLAMATION MARK
+ u'$' # 0x5B -> DOLLAR SIGN
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'\xac' # 0x5F -> NOT SIGN
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xa6' # 0x6A -> BROKEN BAR
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'`' # 0x79 -> GRAVE ACCENT
+ u':' # 0x7A -> COLON
+ u'#' # 0x7B -> NUMBER SIGN
+ u'@' # 0x7C -> COMMERCIAL AT
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'"' # 0x7F -> QUOTATION MARK
+ u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ u'\xb1' # 0x8F -> PLUS-MINUS SIGN
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ u'\xb8' # 0x9D -> CEDILLA
+ u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ u'\xa4' # 0x9F -> CURRENCY SIGN
+ u'\xb5' # 0xA0 -> MICRO SIGN
+ u'~' # 0xA1 -> TILDE
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ u'\xbf' # 0xAB -> INVERTED QUESTION MARK
+ u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ u'\xae' # 0xAF -> REGISTERED SIGN
+ u'^' # 0xB0 -> CIRCUMFLEX ACCENT
+ u'\xa3' # 0xB1 -> POUND SIGN
+ u'\xa5' # 0xB2 -> YEN SIGN
+ u'\xb7' # 0xB3 -> MIDDLE DOT
+ u'\xa9' # 0xB4 -> COPYRIGHT SIGN
+ u'\xa7' # 0xB5 -> SECTION SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ u'[' # 0xBA -> LEFT SQUARE BRACKET
+ u']' # 0xBB -> RIGHT SQUARE BRACKET
+ u'\xaf' # 0xBC -> MACRON
+ u'\xa8' # 0xBD -> DIAERESIS
+ u'\xb4' # 0xBE -> ACUTE ACCENT
+ u'\xd7' # 0xBF -> MULTIPLICATION SIGN
+ u'{' # 0xC0 -> LEFT CURLY BRACKET
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ u'}' # 0xD0 -> RIGHT CURLY BRACKET
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb9' # 0xDA -> SUPERSCRIPT ONE
+ u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\\' # 0xE0 -> REVERSE SOLIDUS
+ u'\xf7' # 0xE1 -> DIVISION SIGN
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1006.py b/cashew/Lib/encodings/cp1006.py
new file mode 100644
index 0000000..e21e804
--- /dev/null
+++ b/cashew/Lib/encodings/cp1006.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1006',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\x80' # 0x80 ->
+ u'\x81' # 0x81 ->
+ u'\x82' # 0x82 ->
+ u'\x83' # 0x83 ->
+ u'\x84' # 0x84 ->
+ u'\x85' # 0x85 ->
+ u'\x86' # 0x86 ->
+ u'\x87' # 0x87 ->
+ u'\x88' # 0x88 ->
+ u'\x89' # 0x89 ->
+ u'\x8a' # 0x8A ->
+ u'\x8b' # 0x8B ->
+ u'\x8c' # 0x8C ->
+ u'\x8d' # 0x8D ->
+ u'\x8e' # 0x8E ->
+ u'\x8f' # 0x8F ->
+ u'\x90' # 0x90 ->
+ u'\x91' # 0x91 ->
+ u'\x92' # 0x92 ->
+ u'\x93' # 0x93 ->
+ u'\x94' # 0x94 ->
+ u'\x95' # 0x95 ->
+ u'\x96' # 0x96 ->
+ u'\x97' # 0x97 ->
+ u'\x98' # 0x98 ->
+ u'\x99' # 0x99 ->
+ u'\x9a' # 0x9A ->
+ u'\x9b' # 0x9B ->
+ u'\x9c' # 0x9C ->
+ u'\x9d' # 0x9D ->
+ u'\x9e' # 0x9E ->
+ u'\x9f' # 0x9F ->
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO
+ u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE
+ u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO
+ u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE
+ u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR
+ u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE
+ u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX
+ u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN
+ u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT
+ u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE
+ u'\u060c' # 0xAB -> ARABIC COMMA
+ u'\u061b' # 0xAC -> ARABIC SEMICOLON
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\u061f' # 0xAE -> ARABIC QUESTION MARK
+ u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM
+ u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM
+ u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM
+ u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM
+ u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM
+ u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM
+ u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM
+ u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM
+ u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM
+ u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM
+ u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM
+ u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM
+ u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM
+ u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM
+ u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM
+ u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM
+ u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM
+ u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM
+ u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM
+ u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM
+ u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM
+ u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM
+ u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN
+ u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM
+ u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM
+ u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM
+ u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM
+ u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM
+ u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM
+ u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM
+ u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM
+ u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM
+ u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM
+ u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM
+ u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM
+ u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM
+ u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM
+ u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM
+ u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM
+ u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM
+ u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM
+ u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM
+ u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM
+ u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM
+ u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM
+ u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM
+ u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM
+ u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM
+ u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM
+ u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM
+ u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM
+ u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM
+ u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM
+ u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM
+ u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM
+ u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM
+ u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM
+ u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM
+ u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM
+ u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM
+ u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM
+ u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM
+ u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM
+ u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM
+ u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM
+ u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM
+ u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
+ u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM
+ u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
+ u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM
+ u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM
+ u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM
+ u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM
+ u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM
+ u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1026.py b/cashew/Lib/encodings/cp1026.py
new file mode 100644
index 0000000..45bbe62
--- /dev/null
+++ b/cashew/Lib/encodings/cp1026.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1026',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> CONTROL
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> CONTROL
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> CONTROL
+ u'\x8d' # 0x09 -> CONTROL
+ u'\x8e' # 0x0A -> CONTROL
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> CONTROL
+ u'\x85' # 0x15 -> CONTROL
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> CONTROL
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> CONTROL
+ u'\x8f' # 0x1B -> CONTROL
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> CONTROL
+ u'\x81' # 0x21 -> CONTROL
+ u'\x82' # 0x22 -> CONTROL
+ u'\x83' # 0x23 -> CONTROL
+ u'\x84' # 0x24 -> CONTROL
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> CONTROL
+ u'\x89' # 0x29 -> CONTROL
+ u'\x8a' # 0x2A -> CONTROL
+ u'\x8b' # 0x2B -> CONTROL
+ u'\x8c' # 0x2C -> CONTROL
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 -> CONTROL
+ u'\x91' # 0x31 -> CONTROL
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> CONTROL
+ u'\x94' # 0x34 -> CONTROL
+ u'\x95' # 0x35 -> CONTROL
+ u'\x96' # 0x36 -> CONTROL
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> CONTROL
+ u'\x99' # 0x39 -> CONTROL
+ u'\x9a' # 0x3A -> CONTROL
+ u'\x9b' # 0x3B -> CONTROL
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E -> CONTROL
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\xa0' # 0x41 -> NO-BREAK SPACE
+ u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'{' # 0x48 -> LEFT CURLY BRACKET
+ u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'!' # 0x4F -> EXCLAMATION MARK
+ u'&' # 0x50 -> AMPERSAND
+ u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE
+ u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'^' # 0x5F -> CIRCUMFLEX ACCENT
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'[' # 0x68 -> LEFT SQUARE BRACKET
+ u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I
+ u':' # 0x7A -> COLON
+ u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'}' # 0x8C -> RIGHT CURLY BRACKET
+ u'`' # 0x8D -> GRAVE ACCENT
+ u'\xa6' # 0x8E -> BROKEN BAR
+ u'\xb1' # 0x8F -> PLUS-MINUS SIGN
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ u'\xb8' # 0x9D -> CEDILLA
+ u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ u'\xa4' # 0x9F -> CURRENCY SIGN
+ u'\xb5' # 0xA0 -> MICRO SIGN
+ u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ u'\xbf' # 0xAB -> INVERTED QUESTION MARK
+ u']' # 0xAC -> RIGHT SQUARE BRACKET
+ u'$' # 0xAD -> DOLLAR SIGN
+ u'@' # 0xAE -> COMMERCIAL AT
+ u'\xae' # 0xAF -> REGISTERED SIGN
+ u'\xa2' # 0xB0 -> CENT SIGN
+ u'\xa3' # 0xB1 -> POUND SIGN
+ u'\xa5' # 0xB2 -> YEN SIGN
+ u'\xb7' # 0xB3 -> MIDDLE DOT
+ u'\xa9' # 0xB4 -> COPYRIGHT SIGN
+ u'\xa7' # 0xB5 -> SECTION SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ u'\xac' # 0xBA -> NOT SIGN
+ u'|' # 0xBB -> VERTICAL LINE
+ u'\xaf' # 0xBC -> MACRON
+ u'\xa8' # 0xBD -> DIAERESIS
+ u'\xb4' # 0xBE -> ACUTE ACCENT
+ u'\xd7' # 0xBF -> MULTIPLICATION SIGN
+ u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'~' # 0xCC -> TILDE
+ u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb9' # 0xDA -> SUPERSCRIPT ONE
+ u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\\' # 0xDC -> REVERSE SOLIDUS
+ u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xf7' # 0xE1 -> DIVISION SIGN
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'#' # 0xEC -> NUMBER SIGN
+ u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'"' # 0xFC -> QUOTATION MARK
+ u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1140.py b/cashew/Lib/encodings/cp1140.py
new file mode 100644
index 0000000..7e507fd
--- /dev/null
+++ b/cashew/Lib/encodings/cp1140.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1140',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> CONTROL
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> CONTROL
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> CONTROL
+ u'\x8d' # 0x09 -> CONTROL
+ u'\x8e' # 0x0A -> CONTROL
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> CONTROL
+ u'\x85' # 0x15 -> CONTROL
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> CONTROL
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> CONTROL
+ u'\x8f' # 0x1B -> CONTROL
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> CONTROL
+ u'\x81' # 0x21 -> CONTROL
+ u'\x82' # 0x22 -> CONTROL
+ u'\x83' # 0x23 -> CONTROL
+ u'\x84' # 0x24 -> CONTROL
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> CONTROL
+ u'\x89' # 0x29 -> CONTROL
+ u'\x8a' # 0x2A -> CONTROL
+ u'\x8b' # 0x2B -> CONTROL
+ u'\x8c' # 0x2C -> CONTROL
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 -> CONTROL
+ u'\x91' # 0x31 -> CONTROL
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> CONTROL
+ u'\x94' # 0x34 -> CONTROL
+ u'\x95' # 0x35 -> CONTROL
+ u'\x96' # 0x36 -> CONTROL
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> CONTROL
+ u'\x99' # 0x39 -> CONTROL
+ u'\x9a' # 0x3A -> CONTROL
+ u'\x9b' # 0x3B -> CONTROL
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E -> CONTROL
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\xa0' # 0x41 -> NO-BREAK SPACE
+ u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xa2' # 0x4A -> CENT SIGN
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'|' # 0x4F -> VERTICAL LINE
+ u'&' # 0x50 -> AMPERSAND
+ u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u'!' # 0x5A -> EXCLAMATION MARK
+ u'$' # 0x5B -> DOLLAR SIGN
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'\xac' # 0x5F -> NOT SIGN
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xa6' # 0x6A -> BROKEN BAR
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'`' # 0x79 -> GRAVE ACCENT
+ u':' # 0x7A -> COLON
+ u'#' # 0x7B -> NUMBER SIGN
+ u'@' # 0x7C -> COMMERCIAL AT
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'"' # 0x7F -> QUOTATION MARK
+ u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ u'\xb1' # 0x8F -> PLUS-MINUS SIGN
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ u'\xb8' # 0x9D -> CEDILLA
+ u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ u'\u20ac' # 0x9F -> EURO SIGN
+ u'\xb5' # 0xA0 -> MICRO SIGN
+ u'~' # 0xA1 -> TILDE
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ u'\xbf' # 0xAB -> INVERTED QUESTION MARK
+ u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ u'\xae' # 0xAF -> REGISTERED SIGN
+ u'^' # 0xB0 -> CIRCUMFLEX ACCENT
+ u'\xa3' # 0xB1 -> POUND SIGN
+ u'\xa5' # 0xB2 -> YEN SIGN
+ u'\xb7' # 0xB3 -> MIDDLE DOT
+ u'\xa9' # 0xB4 -> COPYRIGHT SIGN
+ u'\xa7' # 0xB5 -> SECTION SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ u'[' # 0xBA -> LEFT SQUARE BRACKET
+ u']' # 0xBB -> RIGHT SQUARE BRACKET
+ u'\xaf' # 0xBC -> MACRON
+ u'\xa8' # 0xBD -> DIAERESIS
+ u'\xb4' # 0xBE -> ACUTE ACCENT
+ u'\xd7' # 0xBF -> MULTIPLICATION SIGN
+ u'{' # 0xC0 -> LEFT CURLY BRACKET
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ u'}' # 0xD0 -> RIGHT CURLY BRACKET
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb9' # 0xDA -> SUPERSCRIPT ONE
+ u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\\' # 0xE0 -> REVERSE SOLIDUS
+ u'\xf7' # 0xE1 -> DIVISION SIGN
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1250.py b/cashew/Lib/encodings/cp1250.py
new file mode 100644
index 0000000..d620b89
--- /dev/null
+++ b/cashew/Lib/encodings/cp1250.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1250',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\ufffe' # 0x83 -> UNDEFINED
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\ufffe' # 0x88 -> UNDEFINED
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE
+ u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON
+ u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+ u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\ufffe' # 0x98 -> UNDEFINED
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE
+ u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON
+ u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
+ u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u02c7' # 0xA1 -> CARON
+ u'\u02d8' # 0xA2 -> BREVE
+ u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\u02db' # 0xB2 -> OGONEK
+ u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK
+ u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON
+ u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT
+ u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON
+ u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
+ u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+ u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
+ u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
+ u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+ u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+ u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON
+ u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON
+ u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+ u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+ u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON
+ u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON
+ u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+ u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
+ u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE
+ u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
+ u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE
+ u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE
+ u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
+ u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON
+ u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON
+ u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
+ u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+ u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON
+ u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON
+ u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
+ u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA
+ u'\u02d9' # 0xFF -> DOT ABOVE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1251.py b/cashew/Lib/encodings/cp1251.py
new file mode 100644
index 0000000..216771f
--- /dev/null
+++ b/cashew/Lib/encodings/cp1251.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1251',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE
+ u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u20ac' # 0x88 -> EURO SIGN
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE
+ u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE
+ u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE
+ u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE
+ u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\ufffe' # 0x98 -> UNDEFINED
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE
+ u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE
+ u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE
+ u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U
+ u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U
+ u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO
+ u'\u2116' # 0xB9 -> NUMERO SIGN
+ u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE
+ u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE
+ u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE
+ u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI
+ u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A
+ u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE
+ u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE
+ u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE
+ u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE
+ u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE
+ u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE
+ u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE
+ u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I
+ u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
+ u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA
+ u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL
+ u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM
+ u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN
+ u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O
+ u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE
+ u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER
+ u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES
+ u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE
+ u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U
+ u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF
+ u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA
+ u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE
+ u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE
+ u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA
+ u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
+ u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
+ u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU
+ u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E
+ u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU
+ u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA
+ u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A
+ u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE
+ u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE
+ u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE
+ u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE
+ u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE
+ u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE
+ u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE
+ u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I
+ u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I
+ u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA
+ u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL
+ u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM
+ u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN
+ u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O
+ u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE
+ u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER
+ u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES
+ u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE
+ u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U
+ u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF
+ u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA
+ u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE
+ u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE
+ u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA
+ u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA
+ u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN
+ u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU
+ u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
+ u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E
+ u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU
+ u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1252.py b/cashew/Lib/encodings/cp1252.py
new file mode 100644
index 0000000..e60a328
--- /dev/null
+++ b/cashew/Lib/encodings/cp1252.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1252',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\u02dc' # 0x98 -> SMALL TILDE
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
+ u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xaf' # 0xAF -> MACRON
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\xbf' # 0xBF -> INVERTED QUESTION MARK
+ u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH
+ u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN
+ u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH
+ u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN
+ u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1253.py b/cashew/Lib/encodings/cp1253.py
new file mode 100644
index 0000000..49f6ccc
--- /dev/null
+++ b/cashew/Lib/encodings/cp1253.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1253',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\ufffe' # 0x88 -> UNDEFINED
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\ufffe' # 0x8A -> UNDEFINED
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x8C -> UNDEFINED
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\ufffe' # 0x98 -> UNDEFINED
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x9C -> UNDEFINED
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\ufffe' # 0x9F -> UNDEFINED
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS
+ u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\ufffe' # 0xAA -> UNDEFINED
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\u2015' # 0xAF -> HORIZONTAL BAR
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\u0384' # 0xB4 -> GREEK TONOS
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
+ u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA
+ u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA
+ u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA
+ u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA
+ u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON
+ u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA
+ u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA
+ u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA
+ u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA
+ u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA
+ u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA
+ u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU
+ u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU
+ u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI
+ u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON
+ u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI
+ u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO
+ u'\ufffe' # 0xD2 -> UNDEFINED
+ u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU
+ u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON
+ u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI
+ u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI
+ u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI
+ u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA
+ u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
+ u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
+ u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS
+ u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
+ u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA
+ u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA
+ u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA
+ u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA
+ u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON
+ u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA
+ u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA
+ u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA
+ u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA
+ u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA
+ u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA
+ u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU
+ u'\u03bd' # 0xED -> GREEK SMALL LETTER NU
+ u'\u03be' # 0xEE -> GREEK SMALL LETTER XI
+ u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON
+ u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI
+ u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO
+ u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA
+ u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA
+ u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU
+ u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON
+ u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI
+ u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI
+ u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI
+ u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA
+ u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
+ u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
+ u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
+ u'\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1254.py b/cashew/Lib/encodings/cp1254.py
new file mode 100644
index 0000000..65530ab
--- /dev/null
+++ b/cashew/Lib/encodings/cp1254.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1254',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\u02dc' # 0x98 -> SMALL TILDE
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xaf' # 0xAF -> MACRON
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\xbf' # 0xBF -> INVERTED QUESTION MARK
+ u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
+ u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
+ u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE
+ u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I
+ u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA
+ u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1255.py b/cashew/Lib/encodings/cp1255.py
new file mode 100644
index 0000000..fd1456f
--- /dev/null
+++ b/cashew/Lib/encodings/cp1255.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1255',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\ufffe' # 0x8A -> UNDEFINED
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x8C -> UNDEFINED
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\u02dc' # 0x98 -> SMALL TILDE
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x9C -> UNDEFINED
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\ufffe' # 0x9F -> UNDEFINED
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\xd7' # 0xAA -> MULTIPLICATION SIGN
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xaf' # 0xAF -> MACRON
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\xf7' # 0xBA -> DIVISION SIGN
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\xbf' # 0xBF -> INVERTED QUESTION MARK
+ u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA
+ u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL
+ u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH
+ u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS
+ u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ
+ u'\u05b5' # 0xC5 -> HEBREW POINT TSERE
+ u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL
+ u'\u05b7' # 0xC7 -> HEBREW POINT PATAH
+ u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS
+ u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM
+ u'\ufffe' # 0xCA -> UNDEFINED
+ u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS
+ u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ
+ u'\u05bd' # 0xCD -> HEBREW POINT METEG
+ u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF
+ u'\u05bf' # 0xCF -> HEBREW POINT RAFE
+ u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ
+ u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT
+ u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT
+ u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ
+ u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV
+ u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD
+ u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD
+ u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH
+ u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM
+ u'\ufffe' # 0xD9 -> UNDEFINED
+ u'\ufffe' # 0xDA -> UNDEFINED
+ u'\ufffe' # 0xDB -> UNDEFINED
+ u'\ufffe' # 0xDC -> UNDEFINED
+ u'\ufffe' # 0xDD -> UNDEFINED
+ u'\ufffe' # 0xDE -> UNDEFINED
+ u'\ufffe' # 0xDF -> UNDEFINED
+ u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF
+ u'\u05d1' # 0xE1 -> HEBREW LETTER BET
+ u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL
+ u'\u05d3' # 0xE3 -> HEBREW LETTER DALET
+ u'\u05d4' # 0xE4 -> HEBREW LETTER HE
+ u'\u05d5' # 0xE5 -> HEBREW LETTER VAV
+ u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN
+ u'\u05d7' # 0xE7 -> HEBREW LETTER HET
+ u'\u05d8' # 0xE8 -> HEBREW LETTER TET
+ u'\u05d9' # 0xE9 -> HEBREW LETTER YOD
+ u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF
+ u'\u05db' # 0xEB -> HEBREW LETTER KAF
+ u'\u05dc' # 0xEC -> HEBREW LETTER LAMED
+ u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM
+ u'\u05de' # 0xEE -> HEBREW LETTER MEM
+ u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN
+ u'\u05e0' # 0xF0 -> HEBREW LETTER NUN
+ u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH
+ u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN
+ u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE
+ u'\u05e4' # 0xF4 -> HEBREW LETTER PE
+ u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI
+ u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI
+ u'\u05e7' # 0xF7 -> HEBREW LETTER QOF
+ u'\u05e8' # 0xF8 -> HEBREW LETTER RESH
+ u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN
+ u'\u05ea' # 0xFA -> HEBREW LETTER TAV
+ u'\ufffe' # 0xFB -> UNDEFINED
+ u'\ufffe' # 0xFC -> UNDEFINED
+ u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
+ u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
+ u'\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1256.py b/cashew/Lib/encodings/cp1256.py
new file mode 100644
index 0000000..302b5fa
--- /dev/null
+++ b/cashew/Lib/encodings/cp1256.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1256',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\u067e' # 0x81 -> ARABIC LETTER PEH
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\u0679' # 0x8A -> ARABIC LETTER TTEH
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ u'\u0686' # 0x8D -> ARABIC LETTER TCHEH
+ u'\u0698' # 0x8E -> ARABIC LETTER JEH
+ u'\u0688' # 0x8F -> ARABIC LETTER DDAL
+ u'\u06af' # 0x90 -> ARABIC LETTER GAF
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\u0691' # 0x9A -> ARABIC LETTER RREH
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER
+ u'\u200d' # 0x9E -> ZERO WIDTH JOINER
+ u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u060c' # 0xA1 -> ARABIC COMMA
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xaf' # 0xAF -> MACRON
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\u061b' # 0xBA -> ARABIC SEMICOLON
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\u061f' # 0xBF -> ARABIC QUESTION MARK
+ u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL
+ u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA
+ u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+ u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+ u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+ u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+ u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+ u'\u0627' # 0xC7 -> ARABIC LETTER ALEF
+ u'\u0628' # 0xC8 -> ARABIC LETTER BEH
+ u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA
+ u'\u062a' # 0xCA -> ARABIC LETTER TEH
+ u'\u062b' # 0xCB -> ARABIC LETTER THEH
+ u'\u062c' # 0xCC -> ARABIC LETTER JEEM
+ u'\u062d' # 0xCD -> ARABIC LETTER HAH
+ u'\u062e' # 0xCE -> ARABIC LETTER KHAH
+ u'\u062f' # 0xCF -> ARABIC LETTER DAL
+ u'\u0630' # 0xD0 -> ARABIC LETTER THAL
+ u'\u0631' # 0xD1 -> ARABIC LETTER REH
+ u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN
+ u'\u0633' # 0xD3 -> ARABIC LETTER SEEN
+ u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN
+ u'\u0635' # 0xD5 -> ARABIC LETTER SAD
+ u'\u0636' # 0xD6 -> ARABIC LETTER DAD
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\u0637' # 0xD8 -> ARABIC LETTER TAH
+ u'\u0638' # 0xD9 -> ARABIC LETTER ZAH
+ u'\u0639' # 0xDA -> ARABIC LETTER AIN
+ u'\u063a' # 0xDB -> ARABIC LETTER GHAIN
+ u'\u0640' # 0xDC -> ARABIC TATWEEL
+ u'\u0641' # 0xDD -> ARABIC LETTER FEH
+ u'\u0642' # 0xDE -> ARABIC LETTER QAF
+ u'\u0643' # 0xDF -> ARABIC LETTER KAF
+ u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\u0644' # 0xE1 -> ARABIC LETTER LAM
+ u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\u0645' # 0xE3 -> ARABIC LETTER MEEM
+ u'\u0646' # 0xE4 -> ARABIC LETTER NOON
+ u'\u0647' # 0xE5 -> ARABIC LETTER HEH
+ u'\u0648' # 0xE6 -> ARABIC LETTER WAW
+ u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA
+ u'\u064a' # 0xED -> ARABIC LETTER YEH
+ u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\u064b' # 0xF0 -> ARABIC FATHATAN
+ u'\u064c' # 0xF1 -> ARABIC DAMMATAN
+ u'\u064d' # 0xF2 -> ARABIC KASRATAN
+ u'\u064e' # 0xF3 -> ARABIC FATHA
+ u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\u064f' # 0xF5 -> ARABIC DAMMA
+ u'\u0650' # 0xF6 -> ARABIC KASRA
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\u0651' # 0xF8 -> ARABIC SHADDA
+ u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\u0652' # 0xFA -> ARABIC SUKUN
+ u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
+ u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
+ u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1257.py b/cashew/Lib/encodings/cp1257.py
new file mode 100644
index 0000000..53a6b29
--- /dev/null
+++ b/cashew/Lib/encodings/cp1257.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1257',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\ufffe' # 0x83 -> UNDEFINED
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\ufffe' # 0x88 -> UNDEFINED
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\ufffe' # 0x8A -> UNDEFINED
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x8C -> UNDEFINED
+ u'\xa8' # 0x8D -> DIAERESIS
+ u'\u02c7' # 0x8E -> CARON
+ u'\xb8' # 0x8F -> CEDILLA
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\ufffe' # 0x98 -> UNDEFINED
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\ufffe' # 0x9C -> UNDEFINED
+ u'\xaf' # 0x9D -> MACRON
+ u'\u02db' # 0x9E -> OGONEK
+ u'\ufffe' # 0x9F -> UNDEFINED
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\ufffe' # 0xA1 -> UNDEFINED
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\ufffe' # 0xA5 -> UNDEFINED
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\xe6' # 0xBF -> LATIN SMALL LETTER AE
+ u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
+ u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
+ u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
+ u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
+ u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
+ u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
+ u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+ u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
+ u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+ u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
+ u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
+ u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON
+ u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
+ u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+ u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
+ u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
+ u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
+ u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
+ u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
+ u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON
+ u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON
+ u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK
+ u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK
+ u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON
+ u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE
+ u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK
+ u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON
+ u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE
+ u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
+ u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA
+ u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA
+ u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON
+ u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA
+ u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON
+ u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+ u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
+ u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON
+ u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK
+ u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE
+ u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE
+ u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON
+ u'\u02d9' # 0xFF -> DOT ABOVE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp1258.py b/cashew/Lib/encodings/cp1258.py
new file mode 100644
index 0000000..4b25d8e
--- /dev/null
+++ b/cashew/Lib/encodings/cp1258.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1258',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\u2020' # 0x86 -> DAGGER
+ u'\u2021' # 0x87 -> DOUBLE DAGGER
+ u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ u'\u2030' # 0x89 -> PER MILLE SIGN
+ u'\ufffe' # 0x8A -> UNDEFINED
+ u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\u02dc' # 0x98 -> SMALL TILDE
+ u'\u2122' # 0x99 -> TRADE MARK SIGN
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ u'\xa2' # 0xA2 -> CENT SIGN
+ u'\xa3' # 0xA3 -> POUND SIGN
+ u'\xa4' # 0xA4 -> CURRENCY SIGN
+ u'\xa5' # 0xA5 -> YEN SIGN
+ u'\xa6' # 0xA6 -> BROKEN BAR
+ u'\xa7' # 0xA7 -> SECTION SIGN
+ u'\xa8' # 0xA8 -> DIAERESIS
+ u'\xa9' # 0xA9 -> COPYRIGHT SIGN
+ u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xAC -> NOT SIGN
+ u'\xad' # 0xAD -> SOFT HYPHEN
+ u'\xae' # 0xAE -> REGISTERED SIGN
+ u'\xaf' # 0xAF -> MACRON
+ u'\xb0' # 0xB0 -> DEGREE SIGN
+ u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ u'\xb4' # 0xB4 -> ACUTE ACCENT
+ u'\xb5' # 0xB5 -> MICRO SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xb7' # 0xB7 -> MIDDLE DOT
+ u'\xb8' # 0xB8 -> CEDILLA
+ u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ u'\xbf' # 0xBF -> INVERTED QUESTION MARK
+ u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+ u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT
+ u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+ u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE
+ u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN
+ u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN
+ u'\u0303' # 0xDE -> COMBINING TILDE
+ u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
+ u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT
+ u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
+ u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ u'\u0323' # 0xF2 -> COMBINING DOT BELOW
+ u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN
+ u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf7' # 0xF7 -> DIVISION SIGN
+ u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN
+ u'\u20ab' # 0xFE -> DONG SIGN
+ u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp424.py b/cashew/Lib/encodings/cp424.py
new file mode 100644
index 0000000..d3ade22
--- /dev/null
+++ b/cashew/Lib/encodings/cp424.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp424',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> SELECT
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> REQUIRED NEW LINE
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> GRAPHIC ESCAPE
+ u'\x8d' # 0x09 -> SUPERSCRIPT
+ u'\x8e' # 0x0A -> REPEAT
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION
+ u'\x85' # 0x15 -> NEW LINE
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> UNIT BACK SPACE
+ u'\x8f' # 0x1B -> CUSTOMER USE ONE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> DIGIT SELECT
+ u'\x81' # 0x21 -> START OF SIGNIFICANCE
+ u'\x82' # 0x22 -> FIELD SEPARATOR
+ u'\x83' # 0x23 -> WORD UNDERSCORE
+ u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> SET ATTRIBUTE
+ u'\x89' # 0x29 -> START FIELD EXTENDED
+ u'\x8a' # 0x2A -> SET MODE OR SWITCH
+ u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX
+ u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 ->
+ u'\x91' # 0x31 ->
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> INDEX RETURN
+ u'\x94' # 0x34 -> PRESENTATION POSITION
+ u'\x95' # 0x35 -> TRANSPARENT
+ u'\x96' # 0x36 -> NUMERIC BACKSPACE
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> SUBSCRIPT
+ u'\x99' # 0x39 -> INDENT TABULATION
+ u'\x9a' # 0x3A -> REVERSE FORM FEED
+ u'\x9b' # 0x3B -> CUSTOMER USE THREE
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E ->
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\u05d0' # 0x41 -> HEBREW LETTER ALEF
+ u'\u05d1' # 0x42 -> HEBREW LETTER BET
+ u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL
+ u'\u05d3' # 0x44 -> HEBREW LETTER DALET
+ u'\u05d4' # 0x45 -> HEBREW LETTER HE
+ u'\u05d5' # 0x46 -> HEBREW LETTER VAV
+ u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN
+ u'\u05d7' # 0x48 -> HEBREW LETTER HET
+ u'\u05d8' # 0x49 -> HEBREW LETTER TET
+ u'\xa2' # 0x4A -> CENT SIGN
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'|' # 0x4F -> VERTICAL LINE
+ u'&' # 0x50 -> AMPERSAND
+ u'\u05d9' # 0x51 -> HEBREW LETTER YOD
+ u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF
+ u'\u05db' # 0x53 -> HEBREW LETTER KAF
+ u'\u05dc' # 0x54 -> HEBREW LETTER LAMED
+ u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM
+ u'\u05de' # 0x56 -> HEBREW LETTER MEM
+ u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN
+ u'\u05e0' # 0x58 -> HEBREW LETTER NUN
+ u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH
+ u'!' # 0x5A -> EXCLAMATION MARK
+ u'$' # 0x5B -> DOLLAR SIGN
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'\xac' # 0x5F -> NOT SIGN
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\u05e2' # 0x62 -> HEBREW LETTER AYIN
+ u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE
+ u'\u05e4' # 0x64 -> HEBREW LETTER PE
+ u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI
+ u'\u05e6' # 0x66 -> HEBREW LETTER TSADI
+ u'\u05e7' # 0x67 -> HEBREW LETTER QOF
+ u'\u05e8' # 0x68 -> HEBREW LETTER RESH
+ u'\u05e9' # 0x69 -> HEBREW LETTER SHIN
+ u'\xa6' # 0x6A -> BROKEN BAR
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\ufffe' # 0x70 -> UNDEFINED
+ u'\u05ea' # 0x71 -> HEBREW LETTER TAV
+ u'\ufffe' # 0x72 -> UNDEFINED
+ u'\ufffe' # 0x73 -> UNDEFINED
+ u'\xa0' # 0x74 -> NO-BREAK SPACE
+ u'\ufffe' # 0x75 -> UNDEFINED
+ u'\ufffe' # 0x76 -> UNDEFINED
+ u'\ufffe' # 0x77 -> UNDEFINED
+ u'\u2017' # 0x78 -> DOUBLE LOW LINE
+ u'`' # 0x79 -> GRAVE ACCENT
+ u':' # 0x7A -> COLON
+ u'#' # 0x7B -> NUMBER SIGN
+ u'@' # 0x7C -> COMMERCIAL AT
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'"' # 0x7F -> QUOTATION MARK
+ u'\ufffe' # 0x80 -> UNDEFINED
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\ufffe' # 0x8C -> UNDEFINED
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\xb1' # 0x8F -> PLUS-MINUS SIGN
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\ufffe' # 0x9B -> UNDEFINED
+ u'\ufffe' # 0x9C -> UNDEFINED
+ u'\xb8' # 0x9D -> CEDILLA
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\xa4' # 0x9F -> CURRENCY SIGN
+ u'\xb5' # 0xA0 -> MICRO SIGN
+ u'~' # 0xA1 -> TILDE
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\ufffe' # 0xAA -> UNDEFINED
+ u'\ufffe' # 0xAB -> UNDEFINED
+ u'\ufffe' # 0xAC -> UNDEFINED
+ u'\ufffe' # 0xAD -> UNDEFINED
+ u'\ufffe' # 0xAE -> UNDEFINED
+ u'\xae' # 0xAF -> REGISTERED SIGN
+ u'^' # 0xB0 -> CIRCUMFLEX ACCENT
+ u'\xa3' # 0xB1 -> POUND SIGN
+ u'\xa5' # 0xB2 -> YEN SIGN
+ u'\xb7' # 0xB3 -> MIDDLE DOT
+ u'\xa9' # 0xB4 -> COPYRIGHT SIGN
+ u'\xa7' # 0xB5 -> SECTION SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ u'[' # 0xBA -> LEFT SQUARE BRACKET
+ u']' # 0xBB -> RIGHT SQUARE BRACKET
+ u'\xaf' # 0xBC -> MACRON
+ u'\xa8' # 0xBD -> DIAERESIS
+ u'\xb4' # 0xBE -> ACUTE ACCENT
+ u'\xd7' # 0xBF -> MULTIPLICATION SIGN
+ u'{' # 0xC0 -> LEFT CURLY BRACKET
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\ufffe' # 0xCB -> UNDEFINED
+ u'\ufffe' # 0xCC -> UNDEFINED
+ u'\ufffe' # 0xCD -> UNDEFINED
+ u'\ufffe' # 0xCE -> UNDEFINED
+ u'\ufffe' # 0xCF -> UNDEFINED
+ u'}' # 0xD0 -> RIGHT CURLY BRACKET
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb9' # 0xDA -> SUPERSCRIPT ONE
+ u'\ufffe' # 0xDB -> UNDEFINED
+ u'\ufffe' # 0xDC -> UNDEFINED
+ u'\ufffe' # 0xDD -> UNDEFINED
+ u'\ufffe' # 0xDE -> UNDEFINED
+ u'\ufffe' # 0xDF -> UNDEFINED
+ u'\\' # 0xE0 -> REVERSE SOLIDUS
+ u'\xf7' # 0xE1 -> DIVISION SIGN
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\ufffe' # 0xEB -> UNDEFINED
+ u'\ufffe' # 0xEC -> UNDEFINED
+ u'\ufffe' # 0xED -> UNDEFINED
+ u'\ufffe' # 0xEE -> UNDEFINED
+ u'\ufffe' # 0xEF -> UNDEFINED
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\ufffe' # 0xFB -> UNDEFINED
+ u'\ufffe' # 0xFC -> UNDEFINED
+ u'\ufffe' # 0xFD -> UNDEFINED
+ u'\ufffe' # 0xFE -> UNDEFINED
+ u'\x9f' # 0xFF -> EIGHT ONES
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp437.py b/cashew/Lib/encodings/cp437.py
new file mode 100644
index 0000000..52cd882
--- /dev/null
+++ b/cashew/Lib/encodings/cp437.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp437',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00a5, # YEN SIGN
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xa2' # 0x009b -> CENT SIGN
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xa5' # 0x009d -> YEN SIGN
+ u'\u20a7' # 0x009e -> PESETA SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a5: 0x009d, # YEN SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp500.py b/cashew/Lib/encodings/cp500.py
new file mode 100644
index 0000000..60766c0
--- /dev/null
+++ b/cashew/Lib/encodings/cp500.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp500',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> CONTROL
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> CONTROL
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> CONTROL
+ u'\x8d' # 0x09 -> CONTROL
+ u'\x8e' # 0x0A -> CONTROL
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> CONTROL
+ u'\x85' # 0x15 -> CONTROL
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> CONTROL
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> CONTROL
+ u'\x8f' # 0x1B -> CONTROL
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> CONTROL
+ u'\x81' # 0x21 -> CONTROL
+ u'\x82' # 0x22 -> CONTROL
+ u'\x83' # 0x23 -> CONTROL
+ u'\x84' # 0x24 -> CONTROL
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> CONTROL
+ u'\x89' # 0x29 -> CONTROL
+ u'\x8a' # 0x2A -> CONTROL
+ u'\x8b' # 0x2B -> CONTROL
+ u'\x8c' # 0x2C -> CONTROL
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 -> CONTROL
+ u'\x91' # 0x31 -> CONTROL
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> CONTROL
+ u'\x94' # 0x34 -> CONTROL
+ u'\x95' # 0x35 -> CONTROL
+ u'\x96' # 0x36 -> CONTROL
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> CONTROL
+ u'\x99' # 0x39 -> CONTROL
+ u'\x9a' # 0x3A -> CONTROL
+ u'\x9b' # 0x3B -> CONTROL
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E -> CONTROL
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\xa0' # 0x41 -> NO-BREAK SPACE
+ u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ u'[' # 0x4A -> LEFT SQUARE BRACKET
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'!' # 0x4F -> EXCLAMATION MARK
+ u'&' # 0x50 -> AMPERSAND
+ u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u']' # 0x5A -> RIGHT SQUARE BRACKET
+ u'$' # 0x5B -> DOLLAR SIGN
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'^' # 0x5F -> CIRCUMFLEX ACCENT
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xa6' # 0x6A -> BROKEN BAR
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'`' # 0x79 -> GRAVE ACCENT
+ u':' # 0x7A -> COLON
+ u'#' # 0x7B -> NUMBER SIGN
+ u'@' # 0x7C -> COMMERCIAL AT
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'"' # 0x7F -> QUOTATION MARK
+ u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ u'\xb1' # 0x8F -> PLUS-MINUS SIGN
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ u'\xb8' # 0x9D -> CEDILLA
+ u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ u'\xa4' # 0x9F -> CURRENCY SIGN
+ u'\xb5' # 0xA0 -> MICRO SIGN
+ u'~' # 0xA1 -> TILDE
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ u'\xbf' # 0xAB -> INVERTED QUESTION MARK
+ u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ u'\xae' # 0xAF -> REGISTERED SIGN
+ u'\xa2' # 0xB0 -> CENT SIGN
+ u'\xa3' # 0xB1 -> POUND SIGN
+ u'\xa5' # 0xB2 -> YEN SIGN
+ u'\xb7' # 0xB3 -> MIDDLE DOT
+ u'\xa9' # 0xB4 -> COPYRIGHT SIGN
+ u'\xa7' # 0xB5 -> SECTION SIGN
+ u'\xb6' # 0xB6 -> PILCROW SIGN
+ u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ u'\xac' # 0xBA -> NOT SIGN
+ u'|' # 0xBB -> VERTICAL LINE
+ u'\xaf' # 0xBC -> MACRON
+ u'\xa8' # 0xBD -> DIAERESIS
+ u'\xb4' # 0xBE -> ACUTE ACCENT
+ u'\xd7' # 0xBF -> MULTIPLICATION SIGN
+ u'{' # 0xC0 -> LEFT CURLY BRACKET
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ u'}' # 0xD0 -> RIGHT CURLY BRACKET
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb9' # 0xDA -> SUPERSCRIPT ONE
+ u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\\' # 0xE0 -> REVERSE SOLIDUS
+ u'\xf7' # 0xE1 -> DIVISION SIGN
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp720.py b/cashew/Lib/encodings/cp720.py
new file mode 100644
index 0000000..5c96d98
--- /dev/null
+++ b/cashew/Lib/encodings/cp720.py
@@ -0,0 +1,309 @@
+"""Python Character Mapping Codec cp720 generated on Windows:
+Vista 6.0.6002 SP2 Multiprocessor Free with the command:
+ python Tools/unicode/genwincodec.py 720
+"""#"
+
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp720',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> CONTROL CHARACTER
+ u'\x01' # 0x01 -> CONTROL CHARACTER
+ u'\x02' # 0x02 -> CONTROL CHARACTER
+ u'\x03' # 0x03 -> CONTROL CHARACTER
+ u'\x04' # 0x04 -> CONTROL CHARACTER
+ u'\x05' # 0x05 -> CONTROL CHARACTER
+ u'\x06' # 0x06 -> CONTROL CHARACTER
+ u'\x07' # 0x07 -> CONTROL CHARACTER
+ u'\x08' # 0x08 -> CONTROL CHARACTER
+ u'\t' # 0x09 -> CONTROL CHARACTER
+ u'\n' # 0x0A -> CONTROL CHARACTER
+ u'\x0b' # 0x0B -> CONTROL CHARACTER
+ u'\x0c' # 0x0C -> CONTROL CHARACTER
+ u'\r' # 0x0D -> CONTROL CHARACTER
+ u'\x0e' # 0x0E -> CONTROL CHARACTER
+ u'\x0f' # 0x0F -> CONTROL CHARACTER
+ u'\x10' # 0x10 -> CONTROL CHARACTER
+ u'\x11' # 0x11 -> CONTROL CHARACTER
+ u'\x12' # 0x12 -> CONTROL CHARACTER
+ u'\x13' # 0x13 -> CONTROL CHARACTER
+ u'\x14' # 0x14 -> CONTROL CHARACTER
+ u'\x15' # 0x15 -> CONTROL CHARACTER
+ u'\x16' # 0x16 -> CONTROL CHARACTER
+ u'\x17' # 0x17 -> CONTROL CHARACTER
+ u'\x18' # 0x18 -> CONTROL CHARACTER
+ u'\x19' # 0x19 -> CONTROL CHARACTER
+ u'\x1a' # 0x1A -> CONTROL CHARACTER
+ u'\x1b' # 0x1B -> CONTROL CHARACTER
+ u'\x1c' # 0x1C -> CONTROL CHARACTER
+ u'\x1d' # 0x1D -> CONTROL CHARACTER
+ u'\x1e' # 0x1E -> CONTROL CHARACTER
+ u'\x1f' # 0x1F -> CONTROL CHARACTER
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> CONTROL CHARACTER
+ u'\x80'
+ u'\x81'
+ u'\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\x84'
+ u'\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\x86'
+ u'\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\x8d'
+ u'\x8e'
+ u'\x8f'
+ u'\x90'
+ u'\u0651' # 0x91 -> ARABIC SHADDA
+ u'\u0652' # 0x92 -> ARABIC SUKUN
+ u'\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xa4' # 0x94 -> CURRENCY SIGN
+ u'\u0640' # 0x95 -> ARABIC TATWEEL
+ u'\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\u0621' # 0x98 -> ARABIC LETTER HAMZA
+ u'\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+ u'\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+ u'\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE
+ u'\xa3' # 0x9C -> POUND SIGN
+ u'\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW
+ u'\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE
+ u'\u0627' # 0x9F -> ARABIC LETTER ALEF
+ u'\u0628' # 0xA0 -> ARABIC LETTER BEH
+ u'\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA
+ u'\u062a' # 0xA2 -> ARABIC LETTER TEH
+ u'\u062b' # 0xA3 -> ARABIC LETTER THEH
+ u'\u062c' # 0xA4 -> ARABIC LETTER JEEM
+ u'\u062d' # 0xA5 -> ARABIC LETTER HAH
+ u'\u062e' # 0xA6 -> ARABIC LETTER KHAH
+ u'\u062f' # 0xA7 -> ARABIC LETTER DAL
+ u'\u0630' # 0xA8 -> ARABIC LETTER THAL
+ u'\u0631' # 0xA9 -> ARABIC LETTER REH
+ u'\u0632' # 0xAA -> ARABIC LETTER ZAIN
+ u'\u0633' # 0xAB -> ARABIC LETTER SEEN
+ u'\u0634' # 0xAC -> ARABIC LETTER SHEEN
+ u'\u0635' # 0xAD -> ARABIC LETTER SAD
+ u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0xB0 -> LIGHT SHADE
+ u'\u2592' # 0xB1 -> MEDIUM SHADE
+ u'\u2593' # 0xB2 -> DARK SHADE
+ u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0xDB -> FULL BLOCK
+ u'\u2584' # 0xDC -> LOWER HALF BLOCK
+ u'\u258c' # 0xDD -> LEFT HALF BLOCK
+ u'\u2590' # 0xDE -> RIGHT HALF BLOCK
+ u'\u2580' # 0xDF -> UPPER HALF BLOCK
+ u'\u0636' # 0xE0 -> ARABIC LETTER DAD
+ u'\u0637' # 0xE1 -> ARABIC LETTER TAH
+ u'\u0638' # 0xE2 -> ARABIC LETTER ZAH
+ u'\u0639' # 0xE3 -> ARABIC LETTER AIN
+ u'\u063a' # 0xE4 -> ARABIC LETTER GHAIN
+ u'\u0641' # 0xE5 -> ARABIC LETTER FEH
+ u'\xb5' # 0xE6 -> MICRO SIGN
+ u'\u0642' # 0xE7 -> ARABIC LETTER QAF
+ u'\u0643' # 0xE8 -> ARABIC LETTER KAF
+ u'\u0644' # 0xE9 -> ARABIC LETTER LAM
+ u'\u0645' # 0xEA -> ARABIC LETTER MEEM
+ u'\u0646' # 0xEB -> ARABIC LETTER NOON
+ u'\u0647' # 0xEC -> ARABIC LETTER HEH
+ u'\u0648' # 0xED -> ARABIC LETTER WAW
+ u'\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA
+ u'\u064a' # 0xEF -> ARABIC LETTER YEH
+ u'\u2261' # 0xF0 -> IDENTICAL TO
+ u'\u064b' # 0xF1 -> ARABIC FATHATAN
+ u'\u064c' # 0xF2 -> ARABIC DAMMATAN
+ u'\u064d' # 0xF3 -> ARABIC KASRATAN
+ u'\u064e' # 0xF4 -> ARABIC FATHA
+ u'\u064f' # 0xF5 -> ARABIC DAMMA
+ u'\u0650' # 0xF6 -> ARABIC KASRA
+ u'\u2248' # 0xF7 -> ALMOST EQUAL TO
+ u'\xb0' # 0xF8 -> DEGREE SIGN
+ u'\u2219' # 0xF9 -> BULLET OPERATOR
+ u'\xb7' # 0xFA -> MIDDLE DOT
+ u'\u221a' # 0xFB -> SQUARE ROOT
+ u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0xFD -> SUPERSCRIPT TWO
+ u'\u25a0' # 0xFE -> BLACK SQUARE
+ u'\xa0' # 0xFF -> NO-BREAK SPACE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp737.py b/cashew/Lib/encodings/cp737.py
new file mode 100644
index 0000000..d654448
--- /dev/null
+++ b/cashew/Lib/encodings/cp737.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp737',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
+ 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
+ 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA
+ 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON
+ 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA
+ 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA
+ 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA
+ 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA
+ 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA
+ 0x008b: 0x039c, # GREEK CAPITAL LETTER MU
+ 0x008c: 0x039d, # GREEK CAPITAL LETTER NU
+ 0x008d: 0x039e, # GREEK CAPITAL LETTER XI
+ 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON
+ 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI
+ 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO
+ 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU
+ 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON
+ 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI
+ 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI
+ 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x0099: 0x03b2, # GREEK SMALL LETTER BETA
+ 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA
+ 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA
+ 0x009e: 0x03b7, # GREEK SMALL LETTER ETA
+ 0x009f: 0x03b8, # GREEK SMALL LETTER THETA
+ 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA
+ 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA
+ 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA
+ 0x00a3: 0x03bc, # GREEK SMALL LETTER MU
+ 0x00a4: 0x03bd, # GREEK SMALL LETTER NU
+ 0x00a5: 0x03be, # GREEK SMALL LETTER XI
+ 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON
+ 0x00a7: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO
+ 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
+ 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON
+ 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI
+ 0x00af: 0x03c8, # GREEK SMALL LETTER PSI
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA
+ 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA
+ u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA
+ u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA
+ u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA
+ u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON
+ u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA
+ u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA
+ u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA
+ u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA
+ u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA
+ u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA
+ u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU
+ u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU
+ u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI
+ u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON
+ u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI
+ u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO
+ u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU
+ u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON
+ u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI
+ u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI
+ u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI
+ u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA
+ u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA
+ u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA
+ u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA
+ u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON
+ u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA
+ u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA
+ u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA
+ u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA
+ u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA
+ u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA
+ u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU
+ u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU
+ u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI
+ u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON
+ u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI
+ u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO
+ u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA
+ u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA
+ u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU
+ u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON
+ u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI
+ u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI
+ u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA
+ u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+ u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+ u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS
+ u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS
+ u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+ u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS
+ u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA
+ 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA
+ 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA
+ 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA
+ 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON
+ 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA
+ 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA
+ 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA
+ 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA
+ 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA
+ 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA
+ 0x039c: 0x008b, # GREEK CAPITAL LETTER MU
+ 0x039d: 0x008c, # GREEK CAPITAL LETTER NU
+ 0x039e: 0x008d, # GREEK CAPITAL LETTER XI
+ 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON
+ 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI
+ 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO
+ 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA
+ 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU
+ 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON
+ 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI
+ 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI
+ 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI
+ 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA
+ 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA
+ 0x03b2: 0x0099, # GREEK SMALL LETTER BETA
+ 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA
+ 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON
+ 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA
+ 0x03b7: 0x009e, # GREEK SMALL LETTER ETA
+ 0x03b8: 0x009f, # GREEK SMALL LETTER THETA
+ 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA
+ 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA
+ 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA
+ 0x03bc: 0x00a3, # GREEK SMALL LETTER MU
+ 0x03bd: 0x00a4, # GREEK SMALL LETTER NU
+ 0x03be: 0x00a5, # GREEK SMALL LETTER XI
+ 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON
+ 0x03c0: 0x00a7, # GREEK SMALL LETTER PI
+ 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO
+ 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA
+ 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU
+ 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON
+ 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI
+ 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI
+ 0x03c8: 0x00af, # GREEK SMALL LETTER PSI
+ 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA
+ 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp775.py b/cashew/Lib/encodings/cp775.py
new file mode 100644
index 0000000..6a456a5
--- /dev/null
+++ b/cashew/Lib/encodings/cp775.py
@@ -0,0 +1,697 @@
+""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp775',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
+ 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
+ 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON
+ 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
+ 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
+ 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON
+ 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
+ 0x0096: 0x00a2, # CENT SIGN
+ 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x00a4, # CURRENCY SIGN
+ 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
+ 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK
+ 0x00a7: 0x00a6, # BROKEN BAR
+ 0x00a8: 0x00a9, # COPYRIGHT SIGN
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
+ 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
+ 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
+ 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
+ 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON
+ 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
+ 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
+ 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
+ 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON
+ 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
+ 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON
+ 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
+ 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
+ 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
+ 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
+ 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
+ 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
+ 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
+ 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
+ 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
+ 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE
+ u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
+ u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON
+ u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA
+ u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA
+ u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON
+ u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA
+ u'\xa2' # 0x0096 -> CENT SIGN
+ u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+ u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd7' # 0x009e -> MULTIPLICATION SIGN
+ u'\xa4' # 0x009f -> CURRENCY SIGN
+ u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON
+ u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE
+ u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK
+ u'\xa6' # 0x00a7 -> BROKEN BAR
+ u'\xa9' # 0x00a8 -> COPYRIGHT SIGN
+ u'\xae' # 0x00a9 -> REGISTERED SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK
+ u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON
+ u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK
+ u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK
+ u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK
+ u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON
+ u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK
+ u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON
+ u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK
+ u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE
+ u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK
+ u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON
+ u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK
+ u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON
+ u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON
+ u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+ u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE
+ u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA
+ u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA
+ u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA
+ u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA
+ u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA
+ u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON
+ u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA
+ u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK
+ u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ u'\xb6' # 0x00f4 -> PILCROW SIGN
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a2: 0x0096, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x009f, # CURRENCY SIGN
+ 0x00a6: 0x00a7, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a9: 0x00a8, # COPYRIGHT SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON
+ 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON
+ 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK
+ 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE
+ 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON
+ 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON
+ 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON
+ 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON
+ 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE
+ 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE
+ 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK
+ 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA
+ 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA
+ 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON
+ 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON
+ 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK
+ 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK
+ 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA
+ 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA
+ 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA
+ 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA
+ 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
+ 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE
+ 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA
+ 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA
+ 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON
+ 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON
+ 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA
+ 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA
+ 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON
+ 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON
+ 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON
+ 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON
+ 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK
+ 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK
+ 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON
+ 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK
+ 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK
+ 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK
+ 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp850.py b/cashew/Lib/encodings/cp850.py
new file mode 100644
index 0000000..0c8478c
--- /dev/null
+++ b/cashew/Lib/encodings/cp850.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp850',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2017, # DOUBLE LOW LINE
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd7' # 0x009e -> MULTIPLICATION SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\xae' # 0x00a9 -> REGISTERED SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\xa2' # 0x00bd -> CENT SIGN
+ u'\xa5' # 0x00be -> YEN SIGN
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0x00cf -> CURRENCY SIGN
+ u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
+ u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
+ u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I
+ u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\xa6' # 0x00dd -> BROKEN BAR
+ u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
+ u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
+ u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xaf' # 0x00ee -> MACRON
+ u'\xb4' # 0x00ef -> ACUTE ACCENT
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2017' # 0x00f2 -> DOUBLE LOW LINE
+ u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ u'\xb6' # 0x00f4 -> PILCROW SIGN
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\xb8' # 0x00f7 -> CEDILLA
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\xa8' # 0x00f9 -> DIAERESIS
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x2017: 0x00f2, # DOUBLE LOW LINE
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp852.py b/cashew/Lib/encodings/cp852.py
new file mode 100644
index 0000000..069d547
--- /dev/null
+++ b/cashew/Lib/encodings/cp852.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp852',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
+ 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
+ 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
+ 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON
+ 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
+ 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON
+ 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
+ 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON
+ 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
+ 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
+ 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
+ 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
+ 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
+ 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON
+ 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
+ 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
+ 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON
+ 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
+ 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON
+ 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
+ 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT
+ 0x00f2: 0x02db, # OGONEK
+ 0x00f3: 0x02c7, # CARON
+ 0x00f4: 0x02d8, # BREVE
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x02d9, # DOT ABOVE
+ 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
+ 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE
+ u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE
+ u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON
+ u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON
+ u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+ u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON
+ u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON
+ u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE
+ u'\xd7' # 0x009e -> MULTIPLICATION SIGN
+ u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK
+ u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK
+ u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON
+ u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON
+ u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK
+ u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE
+ u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON
+ u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON
+ u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE
+ u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0x00cf -> CURRENCY SIGN
+ u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE
+ u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE
+ u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON
+ u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON
+ u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON
+ u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA
+ u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+ u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE
+ u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON
+ u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON
+ u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON
+ u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE
+ u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE
+ u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA
+ u'\xb4' # 0x00ef -> ACUTE ACCENT
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT
+ u'\u02db' # 0x00f2 -> OGONEK
+ u'\u02c7' # 0x00f3 -> CARON
+ u'\u02d8' # 0x00f4 -> BREVE
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\xb8' # 0x00f7 -> CEDILLA
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\xa8' # 0x00f9 -> DIAERESIS
+ u'\u02d9' # 0x00fa -> DOT ABOVE
+ u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON
+ u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE
+ 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE
+ 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK
+ 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE
+ 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON
+ 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON
+ 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON
+ 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON
+ 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE
+ 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE
+ 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK
+ 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON
+ 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON
+ 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE
+ 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE
+ 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON
+ 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON
+ 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
+ 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE
+ 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON
+ 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON
+ 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE
+ 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE
+ 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON
+ 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON
+ 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
+ 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON
+ 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON
+ 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA
+ 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA
+ 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON
+ 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON
+ 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE
+ 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE
+ 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON
+ 0x02c7: 0x00f3, # CARON
+ 0x02d8: 0x00f4, # BREVE
+ 0x02d9: 0x00fa, # DOT ABOVE
+ 0x02db: 0x00f2, # OGONEK
+ 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp855.py b/cashew/Lib/encodings/cp855.py
new file mode 100644
index 0000000..241ef9d
--- /dev/null
+++ b/cashew/Lib/encodings/cp855.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp855',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
+ 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
+ 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
+ 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE
+ 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO
+ 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO
+ 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE
+ 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE
+ 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI
+ 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE
+ 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE
+ 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE
+ 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE
+ 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE
+ 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE
+ 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE
+ 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
+ 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE
+ 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE
+ 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U
+ 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE
+ 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
+ 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU
+ 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU
+ 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
+ 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A
+ 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE
+ 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE
+ 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE
+ 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE
+ 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE
+ 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE
+ 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE
+ 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE
+ 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF
+ 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF
+ 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE
+ 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA
+ 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA
+ 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I
+ 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I
+ 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA
+ 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL
+ 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL
+ 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM
+ 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM
+ 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN
+ 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN
+ 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O
+ 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O
+ 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE
+ 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA
+ 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER
+ 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER
+ 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES
+ 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES
+ 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE
+ 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE
+ 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U
+ 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U
+ 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE
+ 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
+ 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE
+ 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE
+ 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x00ef: 0x2116, # NUMERO SIGN
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU
+ 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU
+ 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE
+ 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE
+ 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA
+ 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA
+ 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E
+ 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E
+ 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
+ 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE
+ 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE
+ 0x00fd: 0x00a7, # SECTION SIGN
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE
+ u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE
+ u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE
+ u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE
+ u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO
+ u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO
+ u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE
+ u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE
+ u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI
+ u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI
+ u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE
+ u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE
+ u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE
+ u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE
+ u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE
+ u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE
+ u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE
+ u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE
+ u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE
+ u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE
+ u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U
+ u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U
+ u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE
+ u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE
+ u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU
+ u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU
+ u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN
+ u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN
+ u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
+ u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A
+ u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE
+ u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE
+ u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE
+ u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE
+ u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE
+ u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE
+ u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE
+ u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE
+ u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF
+ u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF
+ u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE
+ u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA
+ u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA
+ u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I
+ u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I
+ u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA
+ u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0x00cf -> CURRENCY SIGN
+ u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL
+ u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL
+ u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM
+ u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM
+ u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN
+ u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN
+ u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O
+ u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O
+ u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE
+ u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA
+ u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER
+ u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER
+ u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES
+ u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES
+ u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE
+ u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE
+ u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U
+ u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U
+ u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE
+ u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE
+ u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE
+ u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE
+ u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN
+ u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ u'\u2116' # 0x00ef -> NUMERO SIGN
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU
+ u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU
+ u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE
+ u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE
+ u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA
+ u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA
+ u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E
+ u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E
+ u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA
+ u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA
+ u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE
+ u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE
+ u'\xa7' # 0x00fd -> SECTION SIGN
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a7: 0x00fd, # SECTION SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO
+ 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE
+ 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE
+ 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE
+ 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI
+ 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE
+ 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE
+ 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE
+ 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE
+ 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE
+ 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE
+ 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A
+ 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE
+ 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE
+ 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE
+ 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE
+ 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE
+ 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE
+ 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I
+ 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA
+ 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL
+ 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM
+ 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN
+ 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O
+ 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE
+ 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER
+ 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES
+ 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE
+ 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U
+ 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF
+ 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA
+ 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE
+ 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE
+ 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA
+ 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU
+ 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E
+ 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU
+ 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA
+ 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
+ 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE
+ 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE
+ 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE
+ 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE
+ 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE
+ 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE
+ 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE
+ 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I
+ 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I
+ 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA
+ 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL
+ 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM
+ 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN
+ 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O
+ 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE
+ 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER
+ 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES
+ 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE
+ 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U
+ 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF
+ 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA
+ 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE
+ 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE
+ 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA
+ 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA
+ 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU
+ 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E
+ 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU
+ 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA
+ 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO
+ 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE
+ 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE
+ 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE
+ 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI
+ 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE
+ 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE
+ 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE
+ 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE
+ 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE
+ 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U
+ 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE
+ 0x2116: 0x00ef, # NUMERO SIGN
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp856.py b/cashew/Lib/encodings/cp856.py
new file mode 100644
index 0000000..203c2c4
--- /dev/null
+++ b/cashew/Lib/encodings/cp856.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp856',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u05d0' # 0x80 -> HEBREW LETTER ALEF
+ u'\u05d1' # 0x81 -> HEBREW LETTER BET
+ u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL
+ u'\u05d3' # 0x83 -> HEBREW LETTER DALET
+ u'\u05d4' # 0x84 -> HEBREW LETTER HE
+ u'\u05d5' # 0x85 -> HEBREW LETTER VAV
+ u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN
+ u'\u05d7' # 0x87 -> HEBREW LETTER HET
+ u'\u05d8' # 0x88 -> HEBREW LETTER TET
+ u'\u05d9' # 0x89 -> HEBREW LETTER YOD
+ u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF
+ u'\u05db' # 0x8B -> HEBREW LETTER KAF
+ u'\u05dc' # 0x8C -> HEBREW LETTER LAMED
+ u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM
+ u'\u05de' # 0x8E -> HEBREW LETTER MEM
+ u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN
+ u'\u05e0' # 0x90 -> HEBREW LETTER NUN
+ u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH
+ u'\u05e2' # 0x92 -> HEBREW LETTER AYIN
+ u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE
+ u'\u05e4' # 0x94 -> HEBREW LETTER PE
+ u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI
+ u'\u05e6' # 0x96 -> HEBREW LETTER TSADI
+ u'\u05e7' # 0x97 -> HEBREW LETTER QOF
+ u'\u05e8' # 0x98 -> HEBREW LETTER RESH
+ u'\u05e9' # 0x99 -> HEBREW LETTER SHIN
+ u'\u05ea' # 0x9A -> HEBREW LETTER TAV
+ u'\ufffe' # 0x9B -> UNDEFINED
+ u'\xa3' # 0x9C -> POUND SIGN
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\xd7' # 0x9E -> MULTIPLICATION SIGN
+ u'\ufffe' # 0x9F -> UNDEFINED
+ u'\ufffe' # 0xA0 -> UNDEFINED
+ u'\ufffe' # 0xA1 -> UNDEFINED
+ u'\ufffe' # 0xA2 -> UNDEFINED
+ u'\ufffe' # 0xA3 -> UNDEFINED
+ u'\ufffe' # 0xA4 -> UNDEFINED
+ u'\ufffe' # 0xA5 -> UNDEFINED
+ u'\ufffe' # 0xA6 -> UNDEFINED
+ u'\ufffe' # 0xA7 -> UNDEFINED
+ u'\ufffe' # 0xA8 -> UNDEFINED
+ u'\xae' # 0xA9 -> REGISTERED SIGN
+ u'\xac' # 0xAA -> NOT SIGN
+ u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER
+ u'\ufffe' # 0xAD -> UNDEFINED
+ u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0xB0 -> LIGHT SHADE
+ u'\u2592' # 0xB1 -> MEDIUM SHADE
+ u'\u2593' # 0xB2 -> DARK SHADE
+ u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\ufffe' # 0xB5 -> UNDEFINED
+ u'\ufffe' # 0xB6 -> UNDEFINED
+ u'\ufffe' # 0xB7 -> UNDEFINED
+ u'\xa9' # 0xB8 -> COPYRIGHT SIGN
+ u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\xa2' # 0xBD -> CENT SIGN
+ u'\xa5' # 0xBE -> YEN SIGN
+ u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\ufffe' # 0xC6 -> UNDEFINED
+ u'\ufffe' # 0xC7 -> UNDEFINED
+ u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0xCF -> CURRENCY SIGN
+ u'\ufffe' # 0xD0 -> UNDEFINED
+ u'\ufffe' # 0xD1 -> UNDEFINED
+ u'\ufffe' # 0xD2 -> UNDEFINED
+ u'\ufffe' # 0xD3 -> UNDEFINEDS
+ u'\ufffe' # 0xD4 -> UNDEFINED
+ u'\ufffe' # 0xD5 -> UNDEFINED
+ u'\ufffe' # 0xD6 -> UNDEFINEDE
+ u'\ufffe' # 0xD7 -> UNDEFINED
+ u'\ufffe' # 0xD8 -> UNDEFINED
+ u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0xDB -> FULL BLOCK
+ u'\u2584' # 0xDC -> LOWER HALF BLOCK
+ u'\xa6' # 0xDD -> BROKEN BAR
+ u'\ufffe' # 0xDE -> UNDEFINED
+ u'\u2580' # 0xDF -> UPPER HALF BLOCK
+ u'\ufffe' # 0xE0 -> UNDEFINED
+ u'\ufffe' # 0xE1 -> UNDEFINED
+ u'\ufffe' # 0xE2 -> UNDEFINED
+ u'\ufffe' # 0xE3 -> UNDEFINED
+ u'\ufffe' # 0xE4 -> UNDEFINED
+ u'\ufffe' # 0xE5 -> UNDEFINED
+ u'\xb5' # 0xE6 -> MICRO SIGN
+ u'\ufffe' # 0xE7 -> UNDEFINED
+ u'\ufffe' # 0xE8 -> UNDEFINED
+ u'\ufffe' # 0xE9 -> UNDEFINED
+ u'\ufffe' # 0xEA -> UNDEFINED
+ u'\ufffe' # 0xEB -> UNDEFINED
+ u'\ufffe' # 0xEC -> UNDEFINED
+ u'\ufffe' # 0xED -> UNDEFINED
+ u'\xaf' # 0xEE -> MACRON
+ u'\xb4' # 0xEF -> ACUTE ACCENT
+ u'\xad' # 0xF0 -> SOFT HYPHEN
+ u'\xb1' # 0xF1 -> PLUS-MINUS SIGN
+ u'\u2017' # 0xF2 -> DOUBLE LOW LINE
+ u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS
+ u'\xb6' # 0xF4 -> PILCROW SIGN
+ u'\xa7' # 0xF5 -> SECTION SIGN
+ u'\xf7' # 0xF6 -> DIVISION SIGN
+ u'\xb8' # 0xF7 -> CEDILLA
+ u'\xb0' # 0xF8 -> DEGREE SIGN
+ u'\xa8' # 0xF9 -> DIAERESIS
+ u'\xb7' # 0xFA -> MIDDLE DOT
+ u'\xb9' # 0xFB -> SUPERSCRIPT ONE
+ u'\xb3' # 0xFC -> SUPERSCRIPT THREE
+ u'\xb2' # 0xFD -> SUPERSCRIPT TWO
+ u'\u25a0' # 0xFE -> BLACK SQUARE
+ u'\xa0' # 0xFF -> NO-BREAK SPACE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp857.py b/cashew/Lib/encodings/cp857.py
new file mode 100644
index 0000000..c24191b
--- /dev/null
+++ b/cashew/Lib/encodings/cp857.py
@@ -0,0 +1,694 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp857',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
+ 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: None, # UNDEFINED
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: None, # UNDEFINED
+ 0x00e8: 0x00d7, # MULTIPLICATION SIGN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: None, # UNDEFINED
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA
+ u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE
+ u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\xae' # 0x00a9 -> REGISTERED SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\xa2' # 0x00bd -> CENT SIGN
+ u'\xa5' # 0x00be -> YEN SIGN
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0x00cf -> CURRENCY SIGN
+ u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR
+ u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR
+ u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\ufffe' # 0x00d5 -> UNDEFINED
+ u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\xa6' # 0x00dd -> BROKEN BAR
+ u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\ufffe' # 0x00e7 -> UNDEFINED
+ u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN
+ u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xaf' # 0x00ee -> MACRON
+ u'\xb4' # 0x00ef -> ACUTE ACCENT
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\ufffe' # 0x00f2 -> UNDEFINED
+ u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ u'\xb6' # 0x00f4 -> PILCROW SIGN
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\xb8' # 0x00f7 -> CEDILLA
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\xa8' # 0x00f9 -> DIAERESIS
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x00e8, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE
+ 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE
+ 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I
+ 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp858.py b/cashew/Lib/encodings/cp858.py
new file mode 100644
index 0000000..7ba7621
--- /dev/null
+++ b/cashew/Lib/encodings/cp858.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec for CP858, modified from cp850.
+
+"""
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp858',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: 0x20ac, # EURO SIGN
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2017, # DOUBLE LOW LINE
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\xd7' # 0x009e -> MULTIPLICATION SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\xae' # 0x00a9 -> REGISTERED SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\xa2' # 0x00bd -> CENT SIGN
+ u'\xa5' # 0x00be -> YEN SIGN
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\xa4' # 0x00cf -> CURRENCY SIGN
+ u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
+ u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
+ u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\u20ac' # 0x00d5 -> EURO SIGN
+ u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\xa6' # 0x00dd -> BROKEN BAR
+ u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
+ u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
+ u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xaf' # 0x00ee -> MACRON
+ u'\xb4' # 0x00ef -> ACUTE ACCENT
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2017' # 0x00f2 -> DOUBLE LOW LINE
+ u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ u'\xb6' # 0x00f4 -> PILCROW SIGN
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\xb8' # 0x00f7 -> CEDILLA
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\xa8' # 0x00f9 -> DIAERESIS
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x20ac: 0x00d5, # EURO SIGN
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x2017: 0x00f2, # DOUBLE LOW LINE
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp860.py b/cashew/Lib/encodings/cp860.py
new file mode 100644
index 0000000..4acb0cf
--- /dev/null
+++ b/cashew/Lib/encodings/cp860.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp860',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE
+ u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE
+ u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xa2' # 0x009b -> CENT SIGN
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\u20a7' # 0x009e -> PESETA SIGN
+ u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp861.py b/cashew/Lib/encodings/cp861.py
new file mode 100644
index 0000000..0939b5b
--- /dev/null
+++ b/cashew/Lib/encodings/cp861.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp861',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x008c: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH
+ u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH
+ u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE
+ u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\u20a7' # 0x009e -> PESETA SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE
+ u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE
+ u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE
+ u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH
+ 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f0: 0x008c, # LATIN SMALL LETTER ETH
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x0095, # LATIN SMALL LETTER THORN
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp862.py b/cashew/Lib/encodings/cp862.py
new file mode 100644
index 0000000..ea0405c
--- /dev/null
+++ b/cashew/Lib/encodings/cp862.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp862',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x05d0, # HEBREW LETTER ALEF
+ 0x0081: 0x05d1, # HEBREW LETTER BET
+ 0x0082: 0x05d2, # HEBREW LETTER GIMEL
+ 0x0083: 0x05d3, # HEBREW LETTER DALET
+ 0x0084: 0x05d4, # HEBREW LETTER HE
+ 0x0085: 0x05d5, # HEBREW LETTER VAV
+ 0x0086: 0x05d6, # HEBREW LETTER ZAYIN
+ 0x0087: 0x05d7, # HEBREW LETTER HET
+ 0x0088: 0x05d8, # HEBREW LETTER TET
+ 0x0089: 0x05d9, # HEBREW LETTER YOD
+ 0x008a: 0x05da, # HEBREW LETTER FINAL KAF
+ 0x008b: 0x05db, # HEBREW LETTER KAF
+ 0x008c: 0x05dc, # HEBREW LETTER LAMED
+ 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
+ 0x008e: 0x05de, # HEBREW LETTER MEM
+ 0x008f: 0x05df, # HEBREW LETTER FINAL NUN
+ 0x0090: 0x05e0, # HEBREW LETTER NUN
+ 0x0091: 0x05e1, # HEBREW LETTER SAMEKH
+ 0x0092: 0x05e2, # HEBREW LETTER AYIN
+ 0x0093: 0x05e3, # HEBREW LETTER FINAL PE
+ 0x0094: 0x05e4, # HEBREW LETTER PE
+ 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
+ 0x0096: 0x05e6, # HEBREW LETTER TSADI
+ 0x0097: 0x05e7, # HEBREW LETTER QOF
+ 0x0098: 0x05e8, # HEBREW LETTER RESH
+ 0x0099: 0x05e9, # HEBREW LETTER SHIN
+ 0x009a: 0x05ea, # HEBREW LETTER TAV
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00a5, # YEN SIGN
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF
+ u'\u05d1' # 0x0081 -> HEBREW LETTER BET
+ u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL
+ u'\u05d3' # 0x0083 -> HEBREW LETTER DALET
+ u'\u05d4' # 0x0084 -> HEBREW LETTER HE
+ u'\u05d5' # 0x0085 -> HEBREW LETTER VAV
+ u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN
+ u'\u05d7' # 0x0087 -> HEBREW LETTER HET
+ u'\u05d8' # 0x0088 -> HEBREW LETTER TET
+ u'\u05d9' # 0x0089 -> HEBREW LETTER YOD
+ u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF
+ u'\u05db' # 0x008b -> HEBREW LETTER KAF
+ u'\u05dc' # 0x008c -> HEBREW LETTER LAMED
+ u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM
+ u'\u05de' # 0x008e -> HEBREW LETTER MEM
+ u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN
+ u'\u05e0' # 0x0090 -> HEBREW LETTER NUN
+ u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH
+ u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN
+ u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE
+ u'\u05e4' # 0x0094 -> HEBREW LETTER PE
+ u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI
+ u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI
+ u'\u05e7' # 0x0097 -> HEBREW LETTER QOF
+ u'\u05e8' # 0x0098 -> HEBREW LETTER RESH
+ u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN
+ u'\u05ea' # 0x009a -> HEBREW LETTER TAV
+ u'\xa2' # 0x009b -> CENT SIGN
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xa5' # 0x009d -> YEN SIGN
+ u'\u20a7' # 0x009e -> PESETA SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a5: 0x009d, # YEN SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x05d0: 0x0080, # HEBREW LETTER ALEF
+ 0x05d1: 0x0081, # HEBREW LETTER BET
+ 0x05d2: 0x0082, # HEBREW LETTER GIMEL
+ 0x05d3: 0x0083, # HEBREW LETTER DALET
+ 0x05d4: 0x0084, # HEBREW LETTER HE
+ 0x05d5: 0x0085, # HEBREW LETTER VAV
+ 0x05d6: 0x0086, # HEBREW LETTER ZAYIN
+ 0x05d7: 0x0087, # HEBREW LETTER HET
+ 0x05d8: 0x0088, # HEBREW LETTER TET
+ 0x05d9: 0x0089, # HEBREW LETTER YOD
+ 0x05da: 0x008a, # HEBREW LETTER FINAL KAF
+ 0x05db: 0x008b, # HEBREW LETTER KAF
+ 0x05dc: 0x008c, # HEBREW LETTER LAMED
+ 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM
+ 0x05de: 0x008e, # HEBREW LETTER MEM
+ 0x05df: 0x008f, # HEBREW LETTER FINAL NUN
+ 0x05e0: 0x0090, # HEBREW LETTER NUN
+ 0x05e1: 0x0091, # HEBREW LETTER SAMEKH
+ 0x05e2: 0x0092, # HEBREW LETTER AYIN
+ 0x05e3: 0x0093, # HEBREW LETTER FINAL PE
+ 0x05e4: 0x0094, # HEBREW LETTER PE
+ 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI
+ 0x05e6: 0x0096, # HEBREW LETTER TSADI
+ 0x05e7: 0x0097, # HEBREW LETTER QOF
+ 0x05e8: 0x0098, # HEBREW LETTER RESH
+ 0x05e9: 0x0099, # HEBREW LETTER SHIN
+ 0x05ea: 0x009a, # HEBREW LETTER TAV
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp863.py b/cashew/Lib/encodings/cp863.py
new file mode 100644
index 0000000..62dfabf
--- /dev/null
+++ b/cashew/Lib/encodings/cp863.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp863',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00b6, # PILCROW SIGN
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x2017, # DOUBLE LOW LINE
+ 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x008f: 0x00a7, # SECTION SIGN
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00a4, # CURRENCY SIGN
+ 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00a6, # BROKEN BAR
+ 0x00a1: 0x00b4, # ACUTE ACCENT
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00a8, # DIAERESIS
+ 0x00a5: 0x00b8, # CEDILLA
+ 0x00a6: 0x00b3, # SUPERSCRIPT THREE
+ 0x00a7: 0x00af, # MACRON
+ 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xb6' # 0x0086 -> PILCROW SIGN
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\u2017' # 0x008d -> DOUBLE LOW LINE
+ u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE
+ u'\xa7' # 0x008f -> SECTION SIGN
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE
+ u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xa4' # 0x0098 -> CURRENCY SIGN
+ u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xa2' # 0x009b -> CENT SIGN
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+ u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xa6' # 0x00a0 -> BROKEN BAR
+ u'\xb4' # 0x00a1 -> ACUTE ACCENT
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xa8' # 0x00a4 -> DIAERESIS
+ u'\xb8' # 0x00a5 -> CEDILLA
+ u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE
+ u'\xaf' # 0x00a7 -> MACRON
+ u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x0098, # CURRENCY SIGN
+ 0x00a6: 0x00a0, # BROKEN BAR
+ 0x00a7: 0x008f, # SECTION SIGN
+ 0x00a8: 0x00a4, # DIAERESIS
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00af: 0x00a7, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00a6, # SUPERSCRIPT THREE
+ 0x00b4: 0x00a1, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x0086, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00a5, # CEDILLA
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS
+ 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x2017: 0x008d, # DOUBLE LOW LINE
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp864.py b/cashew/Lib/encodings/cp864.py
new file mode 100644
index 0000000..02a0e73
--- /dev/null
+++ b/cashew/Lib/encodings/cp864.py
@@ -0,0 +1,690 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp864',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0025: 0x066a, # ARABIC PERCENT SIGN
+ 0x0080: 0x00b0, # DEGREE SIGN
+ 0x0081: 0x00b7, # MIDDLE DOT
+ 0x0082: 0x2219, # BULLET OPERATOR
+ 0x0083: 0x221a, # SQUARE ROOT
+ 0x0084: 0x2592, # MEDIUM SHADE
+ 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL
+ 0x0086: 0x2502, # FORMS LIGHT VERTICAL
+ 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL
+ 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT
+ 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL
+ 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT
+ 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL
+ 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT
+ 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT
+ 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT
+ 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT
+ 0x0090: 0x03b2, # GREEK SMALL BETA
+ 0x0091: 0x221e, # INFINITY
+ 0x0092: 0x03c6, # GREEK SMALL PHI
+ 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN
+ 0x0094: 0x00bd, # FRACTION 1/2
+ 0x0095: 0x00bc, # FRACTION 1/4
+ 0x0096: 0x2248, # ALMOST EQUAL TO
+ 0x0097: 0x00ab, # LEFT POINTING GUILLEMET
+ 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET
+ 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ 0x009b: None, # UNDEFINED
+ 0x009c: None, # UNDEFINED
+ 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ 0x009f: None, # UNDEFINED
+ 0x00a1: 0x00ad, # SOFT HYPHEN
+ 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ 0x00a6: None, # UNDEFINED
+ 0x00a7: None, # UNDEFINED
+ 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
+ 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
+ 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
+ 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
+ 0x00ac: 0x060c, # ARABIC COMMA
+ 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
+ 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
+ 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
+ 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO
+ 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE
+ 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO
+ 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE
+ 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR
+ 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE
+ 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX
+ 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN
+ 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT
+ 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE
+ 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
+ 0x00bb: 0x061b, # ARABIC SEMICOLON
+ 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
+ 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
+ 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
+ 0x00bf: 0x061f, # ARABIC QUESTION MARK
+ 0x00c0: 0x00a2, # CENT SIGN
+ 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
+ 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM
+ 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
+ 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
+ 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
+ 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
+ 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
+ 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
+ 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
+ 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
+ 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
+ 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM
+ 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
+ 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
+ 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
+ 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
+ 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
+ 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
+ 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
+ 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
+ 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
+ 0x00db: 0x00a6, # BROKEN VERTICAL BAR
+ 0x00dc: 0x00ac, # NOT SIGN
+ 0x00dd: 0x00f7, # DIVISION SIGN
+ 0x00de: 0x00d7, # MULTIPLICATION SIGN
+ 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
+ 0x00e0: 0x0640, # ARABIC TATWEEL
+ 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
+ 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
+ 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
+ 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
+ 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
+ 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
+ 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM
+ 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
+ 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
+ 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
+ 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
+ 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
+ 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
+ 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
+ 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
+ 0x00f1: 0x0651, # ARABIC SHADDAH
+ 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
+ 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM
+ 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM
+ 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM
+ 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM
+ 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
+ 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
+ 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
+ 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
+ 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: None, # UNDEFINED
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xb0' # 0x0080 -> DEGREE SIGN
+ u'\xb7' # 0x0081 -> MIDDLE DOT
+ u'\u2219' # 0x0082 -> BULLET OPERATOR
+ u'\u221a' # 0x0083 -> SQUARE ROOT
+ u'\u2592' # 0x0084 -> MEDIUM SHADE
+ u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL
+ u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL
+ u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL
+ u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT
+ u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT
+ u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL
+ u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT
+ u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT
+ u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT
+ u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT
+ u'\u03b2' # 0x0090 -> GREEK SMALL BETA
+ u'\u221e' # 0x0091 -> INFINITY
+ u'\u03c6' # 0x0092 -> GREEK SMALL PHI
+ u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN
+ u'\xbd' # 0x0094 -> FRACTION 1/2
+ u'\xbc' # 0x0095 -> FRACTION 1/4
+ u'\u2248' # 0x0096 -> ALMOST EQUAL TO
+ u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET
+ u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET
+ u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ u'\ufffe' # 0x009b -> UNDEFINED
+ u'\ufffe' # 0x009c -> UNDEFINED
+ u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ u'\ufffe' # 0x009f -> UNDEFINED
+ u'\xa0' # 0x00a0 -> NON-BREAKING SPACE
+ u'\xad' # 0x00a1 -> SOFT HYPHEN
+ u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ u'\xa3' # 0x00a3 -> POUND SIGN
+ u'\xa4' # 0x00a4 -> CURRENCY SIGN
+ u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ u'\ufffe' # 0x00a6 -> UNDEFINED
+ u'\ufffe' # 0x00a7 -> UNDEFINED
+ u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM
+ u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM
+ u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM
+ u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM
+ u'\u060c' # 0x00ac -> ARABIC COMMA
+ u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM
+ u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM
+ u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM
+ u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO
+ u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE
+ u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO
+ u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE
+ u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR
+ u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE
+ u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX
+ u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN
+ u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT
+ u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE
+ u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM
+ u'\u061b' # 0x00bb -> ARABIC SEMICOLON
+ u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM
+ u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM
+ u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM
+ u'\u061f' # 0x00bf -> ARABIC QUESTION MARK
+ u'\xa2' # 0x00c0 -> CENT SIGN
+ u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM
+ u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM
+ u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM
+ u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM
+ u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM
+ u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM
+ u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM
+ u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM
+ u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM
+ u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM
+ u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM
+ u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM
+ u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM
+ u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM
+ u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM
+ u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM
+ u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM
+ u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM
+ u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM
+ u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM
+ u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM
+ u'\xa6' # 0x00db -> BROKEN VERTICAL BAR
+ u'\xac' # 0x00dc -> NOT SIGN
+ u'\xf7' # 0x00dd -> DIVISION SIGN
+ u'\xd7' # 0x00de -> MULTIPLICATION SIGN
+ u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM
+ u'\u0640' # 0x00e0 -> ARABIC TATWEEL
+ u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM
+ u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM
+ u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM
+ u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM
+ u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM
+ u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM
+ u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM
+ u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM
+ u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM
+ u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM
+ u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM
+ u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM
+ u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM
+ u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM
+ u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM
+ u'\u0651' # 0x00f1 -> ARABIC SHADDAH
+ u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM
+ u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM
+ u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM
+ u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM
+ u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM
+ u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM
+ u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM
+ u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM
+ u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM
+ u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\ufffe' # 0x00ff -> UNDEFINED
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00a0, # NON-BREAKING SPACE
+ 0x00a2: 0x00c0, # CENT SIGN
+ 0x00a3: 0x00a3, # POUND SIGN
+ 0x00a4: 0x00a4, # CURRENCY SIGN
+ 0x00a6: 0x00db, # BROKEN VERTICAL BAR
+ 0x00ab: 0x0097, # LEFT POINTING GUILLEMET
+ 0x00ac: 0x00dc, # NOT SIGN
+ 0x00ad: 0x00a1, # SOFT HYPHEN
+ 0x00b0: 0x0080, # DEGREE SIGN
+ 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN
+ 0x00b7: 0x0081, # MIDDLE DOT
+ 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET
+ 0x00bc: 0x0095, # FRACTION 1/4
+ 0x00bd: 0x0094, # FRACTION 1/2
+ 0x00d7: 0x00de, # MULTIPLICATION SIGN
+ 0x00f7: 0x00dd, # DIVISION SIGN
+ 0x03b2: 0x0090, # GREEK SMALL BETA
+ 0x03c6: 0x0092, # GREEK SMALL PHI
+ 0x060c: 0x00ac, # ARABIC COMMA
+ 0x061b: 0x00bb, # ARABIC SEMICOLON
+ 0x061f: 0x00bf, # ARABIC QUESTION MARK
+ 0x0640: 0x00e0, # ARABIC TATWEEL
+ 0x0651: 0x00f1, # ARABIC SHADDAH
+ 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO
+ 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE
+ 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO
+ 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE
+ 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR
+ 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE
+ 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX
+ 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN
+ 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT
+ 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE
+ 0x066a: 0x0025, # ARABIC PERCENT SIGN
+ 0x2219: 0x0082, # BULLET OPERATOR
+ 0x221a: 0x0083, # SQUARE ROOT
+ 0x221e: 0x0091, # INFINITY
+ 0x2248: 0x0096, # ALMOST EQUAL TO
+ 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL
+ 0x2502: 0x0086, # FORMS LIGHT VERTICAL
+ 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT
+ 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT
+ 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT
+ 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT
+ 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL
+ 0x2592: 0x0084, # MEDIUM SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+ 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM
+ 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM
+ 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM
+ 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM
+ 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM
+ 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM
+ 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM
+ 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM
+ 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM
+ 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM
+ 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM
+ 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM
+ 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM
+ 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM
+ 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM
+ 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM
+ 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM
+ 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM
+ 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM
+ 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM
+ 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM
+ 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM
+ 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM
+ 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM
+ 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM
+ 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM
+ 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM
+ 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM
+ 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM
+ 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM
+ 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM
+ 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM
+ 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM
+ 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM
+ 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM
+ 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM
+ 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM
+ 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM
+ 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM
+ 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM
+ 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM
+ 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM
+ 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM
+ 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM
+ 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM
+ 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM
+ 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM
+ 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM
+ 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM
+ 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM
+ 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM
+ 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM
+ 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM
+ 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM
+ 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM
+ 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM
+ 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM
+ 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM
+ 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+}
diff --git a/cashew/Lib/encodings/cp865.py b/cashew/Lib/encodings/cp865.py
new file mode 100644
index 0000000..e9f45f1
--- /dev/null
+++ b/cashew/Lib/encodings/cp865.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp865',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00a4, # CURRENCY SIGN
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ u'\u20a7' # 0x009e -> PESETA SIGN
+ u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ u'\xac' # 0x00aa -> NOT SIGN
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xa4' # 0x00af -> CURRENCY SIGN
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ u'\xb5' # 0x00e6 -> MICRO SIGN
+ u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ u'\u221e' # 0x00ec -> INFINITY
+ u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ u'\u2229' # 0x00ef -> INTERSECTION
+ u'\u2261' # 0x00f0 -> IDENTICAL TO
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ u'\xf7' # 0x00f6 -> DIVISION SIGN
+ u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00af, # CURRENCY SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp866.py b/cashew/Lib/encodings/cp866.py
new file mode 100644
index 0000000..29cd85a
--- /dev/null
+++ b/cashew/Lib/encodings/cp866.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp866',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
+ 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
+ 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
+ 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
+ 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
+ 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
+ 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
+ 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
+ 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
+ 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
+ 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
+ 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
+ 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
+ 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
+ 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
+ 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
+ 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
+ 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
+ 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
+ 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
+ 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
+ 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
+ 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
+ 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
+ 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
+ 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
+ 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
+ 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
+ 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
+ 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
+ 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
+ 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
+ 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
+ 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
+ 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
+ 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
+ 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
+ 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
+ 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
+ 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
+ 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
+ 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
+ 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
+ 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
+ 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
+ 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
+ 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
+ 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
+ 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
+ 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
+ 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
+ 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
+ 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
+ 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
+ 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
+ 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
+ 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
+ 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
+ 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
+ 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x2116, # NUMERO SIGN
+ 0x00fd: 0x00a4, # CURRENCY SIGN
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A
+ u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE
+ u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE
+ u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE
+ u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE
+ u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE
+ u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE
+ u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE
+ u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I
+ u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
+ u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA
+ u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL
+ u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM
+ u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN
+ u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O
+ u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE
+ u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER
+ u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES
+ u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE
+ u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U
+ u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF
+ u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA
+ u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE
+ u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE
+ u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA
+ u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
+ u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
+ u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU
+ u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E
+ u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU
+ u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA
+ u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
+ u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE
+ u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE
+ u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE
+ u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE
+ u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE
+ u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE
+ u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE
+ u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I
+ u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I
+ u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA
+ u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL
+ u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM
+ u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN
+ u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O
+ u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u258c' # 0x00dd -> LEFT HALF BLOCK
+ u'\u2590' # 0x00de -> RIGHT HALF BLOCK
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER
+ u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES
+ u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE
+ u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U
+ u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF
+ u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA
+ u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE
+ u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE
+ u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA
+ u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA
+ u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
+ u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU
+ u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
+ u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E
+ u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU
+ u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
+ u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
+ u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
+ u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI
+ u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI
+ u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
+ u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\u2219' # 0x00f9 -> BULLET OPERATOR
+ u'\xb7' # 0x00fa -> MIDDLE DOT
+ u'\u221a' # 0x00fb -> SQUARE ROOT
+ u'\u2116' # 0x00fc -> NUMERO SIGN
+ u'\xa4' # 0x00fd -> CURRENCY SIGN
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00fd, # CURRENCY SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
+ 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI
+ 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
+ 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
+ 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
+ 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE
+ 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE
+ 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE
+ 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE
+ 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I
+ 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA
+ 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL
+ 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM
+ 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN
+ 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O
+ 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE
+ 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER
+ 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES
+ 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE
+ 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U
+ 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF
+ 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA
+ 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE
+ 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE
+ 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA
+ 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU
+ 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E
+ 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU
+ 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA
+ 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
+ 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE
+ 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE
+ 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE
+ 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE
+ 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE
+ 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE
+ 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE
+ 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I
+ 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I
+ 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA
+ 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL
+ 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM
+ 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN
+ 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O
+ 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE
+ 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER
+ 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES
+ 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE
+ 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U
+ 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF
+ 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA
+ 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE
+ 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE
+ 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA
+ 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA
+ 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU
+ 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E
+ 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
+ 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
+ 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
+ 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI
+ 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U
+ 0x2116: 0x00fc, # NUMERO SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp869.py b/cashew/Lib/encodings/cp869.py
new file mode 100644
index 0000000..b4dc99b
--- /dev/null
+++ b/cashew/Lib/encodings/cp869.py
@@ -0,0 +1,689 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp869',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: None, # UNDEFINED
+ 0x0081: None, # UNDEFINED
+ 0x0082: None, # UNDEFINED
+ 0x0083: None, # UNDEFINED
+ 0x0084: None, # UNDEFINED
+ 0x0085: None, # UNDEFINED
+ 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0087: None, # UNDEFINED
+ 0x0088: 0x00b7, # MIDDLE DOT
+ 0x0089: 0x00ac, # NOT SIGN
+ 0x008a: 0x00a6, # BROKEN BAR
+ 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK
+ 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x008e: 0x2015, # HORIZONTAL BAR
+ 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x0093: None, # UNDEFINED
+ 0x0094: None, # UNDEFINED
+ 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x0097: 0x00a9, # COPYRIGHT SIGN
+ 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0099: 0x00b2, # SUPERSCRIPT TWO
+ 0x009a: 0x00b3, # SUPERSCRIPT THREE
+ 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA
+ 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA
+ 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA
+ 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON
+ 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA
+ 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA
+ 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA
+ 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU
+ 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI
+ 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI
+ 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU
+ 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON
+ 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI
+ 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI
+ 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA
+ 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA
+ 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA
+ 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA
+ 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA
+ 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA
+ 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA
+ 0x00e6: 0x03bc, # GREEK SMALL LETTER MU
+ 0x00e7: 0x03bd, # GREEK SMALL LETTER NU
+ 0x00e8: 0x03be, # GREEK SMALL LETTER XI
+ 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON
+ 0x00ea: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO
+ 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
+ 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00ef: 0x0384, # GREEK TONOS
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON
+ 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI
+ 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA
+ 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x0000 -> NULL
+ u'\x01' # 0x0001 -> START OF HEADING
+ u'\x02' # 0x0002 -> START OF TEXT
+ u'\x03' # 0x0003 -> END OF TEXT
+ u'\x04' # 0x0004 -> END OF TRANSMISSION
+ u'\x05' # 0x0005 -> ENQUIRY
+ u'\x06' # 0x0006 -> ACKNOWLEDGE
+ u'\x07' # 0x0007 -> BELL
+ u'\x08' # 0x0008 -> BACKSPACE
+ u'\t' # 0x0009 -> HORIZONTAL TABULATION
+ u'\n' # 0x000a -> LINE FEED
+ u'\x0b' # 0x000b -> VERTICAL TABULATION
+ u'\x0c' # 0x000c -> FORM FEED
+ u'\r' # 0x000d -> CARRIAGE RETURN
+ u'\x0e' # 0x000e -> SHIFT OUT
+ u'\x0f' # 0x000f -> SHIFT IN
+ u'\x10' # 0x0010 -> DATA LINK ESCAPE
+ u'\x11' # 0x0011 -> DEVICE CONTROL ONE
+ u'\x12' # 0x0012 -> DEVICE CONTROL TWO
+ u'\x13' # 0x0013 -> DEVICE CONTROL THREE
+ u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x0018 -> CANCEL
+ u'\x19' # 0x0019 -> END OF MEDIUM
+ u'\x1a' # 0x001a -> SUBSTITUTE
+ u'\x1b' # 0x001b -> ESCAPE
+ u'\x1c' # 0x001c -> FILE SEPARATOR
+ u'\x1d' # 0x001d -> GROUP SEPARATOR
+ u'\x1e' # 0x001e -> RECORD SEPARATOR
+ u'\x1f' # 0x001f -> UNIT SEPARATOR
+ u' ' # 0x0020 -> SPACE
+ u'!' # 0x0021 -> EXCLAMATION MARK
+ u'"' # 0x0022 -> QUOTATION MARK
+ u'#' # 0x0023 -> NUMBER SIGN
+ u'$' # 0x0024 -> DOLLAR SIGN
+ u'%' # 0x0025 -> PERCENT SIGN
+ u'&' # 0x0026 -> AMPERSAND
+ u"'" # 0x0027 -> APOSTROPHE
+ u'(' # 0x0028 -> LEFT PARENTHESIS
+ u')' # 0x0029 -> RIGHT PARENTHESIS
+ u'*' # 0x002a -> ASTERISK
+ u'+' # 0x002b -> PLUS SIGN
+ u',' # 0x002c -> COMMA
+ u'-' # 0x002d -> HYPHEN-MINUS
+ u'.' # 0x002e -> FULL STOP
+ u'/' # 0x002f -> SOLIDUS
+ u'0' # 0x0030 -> DIGIT ZERO
+ u'1' # 0x0031 -> DIGIT ONE
+ u'2' # 0x0032 -> DIGIT TWO
+ u'3' # 0x0033 -> DIGIT THREE
+ u'4' # 0x0034 -> DIGIT FOUR
+ u'5' # 0x0035 -> DIGIT FIVE
+ u'6' # 0x0036 -> DIGIT SIX
+ u'7' # 0x0037 -> DIGIT SEVEN
+ u'8' # 0x0038 -> DIGIT EIGHT
+ u'9' # 0x0039 -> DIGIT NINE
+ u':' # 0x003a -> COLON
+ u';' # 0x003b -> SEMICOLON
+ u'<' # 0x003c -> LESS-THAN SIGN
+ u'=' # 0x003d -> EQUALS SIGN
+ u'>' # 0x003e -> GREATER-THAN SIGN
+ u'?' # 0x003f -> QUESTION MARK
+ u'@' # 0x0040 -> COMMERCIAL AT
+ u'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ u'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ u'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ u'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ u'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ u'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ u'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ u'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ u'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ u'J' # 0x004a -> LATIN CAPITAL LETTER J
+ u'K' # 0x004b -> LATIN CAPITAL LETTER K
+ u'L' # 0x004c -> LATIN CAPITAL LETTER L
+ u'M' # 0x004d -> LATIN CAPITAL LETTER M
+ u'N' # 0x004e -> LATIN CAPITAL LETTER N
+ u'O' # 0x004f -> LATIN CAPITAL LETTER O
+ u'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ u'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ u'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ u'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ u'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ u'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ u'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ u'[' # 0x005b -> LEFT SQUARE BRACKET
+ u'\\' # 0x005c -> REVERSE SOLIDUS
+ u']' # 0x005d -> RIGHT SQUARE BRACKET
+ u'^' # 0x005e -> CIRCUMFLEX ACCENT
+ u'_' # 0x005f -> LOW LINE
+ u'`' # 0x0060 -> GRAVE ACCENT
+ u'a' # 0x0061 -> LATIN SMALL LETTER A
+ u'b' # 0x0062 -> LATIN SMALL LETTER B
+ u'c' # 0x0063 -> LATIN SMALL LETTER C
+ u'd' # 0x0064 -> LATIN SMALL LETTER D
+ u'e' # 0x0065 -> LATIN SMALL LETTER E
+ u'f' # 0x0066 -> LATIN SMALL LETTER F
+ u'g' # 0x0067 -> LATIN SMALL LETTER G
+ u'h' # 0x0068 -> LATIN SMALL LETTER H
+ u'i' # 0x0069 -> LATIN SMALL LETTER I
+ u'j' # 0x006a -> LATIN SMALL LETTER J
+ u'k' # 0x006b -> LATIN SMALL LETTER K
+ u'l' # 0x006c -> LATIN SMALL LETTER L
+ u'm' # 0x006d -> LATIN SMALL LETTER M
+ u'n' # 0x006e -> LATIN SMALL LETTER N
+ u'o' # 0x006f -> LATIN SMALL LETTER O
+ u'p' # 0x0070 -> LATIN SMALL LETTER P
+ u'q' # 0x0071 -> LATIN SMALL LETTER Q
+ u'r' # 0x0072 -> LATIN SMALL LETTER R
+ u's' # 0x0073 -> LATIN SMALL LETTER S
+ u't' # 0x0074 -> LATIN SMALL LETTER T
+ u'u' # 0x0075 -> LATIN SMALL LETTER U
+ u'v' # 0x0076 -> LATIN SMALL LETTER V
+ u'w' # 0x0077 -> LATIN SMALL LETTER W
+ u'x' # 0x0078 -> LATIN SMALL LETTER X
+ u'y' # 0x0079 -> LATIN SMALL LETTER Y
+ u'z' # 0x007a -> LATIN SMALL LETTER Z
+ u'{' # 0x007b -> LEFT CURLY BRACKET
+ u'|' # 0x007c -> VERTICAL LINE
+ u'}' # 0x007d -> RIGHT CURLY BRACKET
+ u'~' # 0x007e -> TILDE
+ u'\x7f' # 0x007f -> DELETE
+ u'\ufffe' # 0x0080 -> UNDEFINED
+ u'\ufffe' # 0x0081 -> UNDEFINED
+ u'\ufffe' # 0x0082 -> UNDEFINED
+ u'\ufffe' # 0x0083 -> UNDEFINED
+ u'\ufffe' # 0x0084 -> UNDEFINED
+ u'\ufffe' # 0x0085 -> UNDEFINED
+ u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ u'\ufffe' # 0x0087 -> UNDEFINED
+ u'\xb7' # 0x0088 -> MIDDLE DOT
+ u'\xac' # 0x0089 -> NOT SIGN
+ u'\xa6' # 0x008a -> BROKEN BAR
+ u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK
+ u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ u'\u2015' # 0x008e -> HORIZONTAL BAR
+ u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS
+ u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ u'\ufffe' # 0x0093 -> UNDEFINED
+ u'\ufffe' # 0x0094 -> UNDEFINED
+ u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ u'\xa9' # 0x0097 -> COPYRIGHT SIGN
+ u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ u'\xb2' # 0x0099 -> SUPERSCRIPT TWO
+ u'\xb3' # 0x009a -> SUPERSCRIPT THREE
+ u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS
+ u'\xa3' # 0x009c -> POUND SIGN
+ u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS
+ u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS
+ u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS
+ u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA
+ u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA
+ u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA
+ u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA
+ u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON
+ u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA
+ u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA
+ u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA
+ u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA
+ u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\u2591' # 0x00b0 -> LIGHT SHADE
+ u'\u2592' # 0x00b1 -> MEDIUM SHADE
+ u'\u2593' # 0x00b2 -> DARK SHADE
+ u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA
+ u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA
+ u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU
+ u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU
+ u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI
+ u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON
+ u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI
+ u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO
+ u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA
+ u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU
+ u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON
+ u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI
+ u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI
+ u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI
+ u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA
+ u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA
+ u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA
+ u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA
+ u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ u'\u2588' # 0x00db -> FULL BLOCK
+ u'\u2584' # 0x00dc -> LOWER HALF BLOCK
+ u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA
+ u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON
+ u'\u2580' # 0x00df -> UPPER HALF BLOCK
+ u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA
+ u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA
+ u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA
+ u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA
+ u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA
+ u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA
+ u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU
+ u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU
+ u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI
+ u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON
+ u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI
+ u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO
+ u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA
+ u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA
+ u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU
+ u'\u0384' # 0x00ef -> GREEK TONOS
+ u'\xad' # 0x00f0 -> SOFT HYPHEN
+ u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON
+ u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI
+ u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI
+ u'\xa7' # 0x00f5 -> SECTION SIGN
+ u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI
+ u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS
+ u'\xb0' # 0x00f8 -> DEGREE SIGN
+ u'\xa8' # 0x00f9 -> DIAERESIS
+ u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA
+ u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS
+ u'\u25a0' # 0x00fe -> BLACK SQUARE
+ u'\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a6: 0x008a, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x0097, # COPYRIGHT SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x0089, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x0099, # SUPERSCRIPT TWO
+ 0x00b3: 0x009a, # SUPERSCRIPT THREE
+ 0x00b7: 0x0088, # MIDDLE DOT
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x0384: 0x00ef, # GREEK TONOS
+ 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS
+ 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA
+ 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA
+ 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA
+ 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA
+ 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON
+ 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA
+ 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA
+ 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA
+ 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA
+ 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA
+ 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA
+ 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU
+ 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU
+ 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI
+ 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON
+ 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI
+ 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO
+ 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA
+ 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU
+ 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON
+ 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI
+ 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI
+ 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI
+ 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA
+ 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA
+ 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA
+ 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA
+ 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON
+ 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA
+ 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA
+ 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA
+ 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA
+ 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA
+ 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA
+ 0x03bc: 0x00e6, # GREEK SMALL LETTER MU
+ 0x03bd: 0x00e7, # GREEK SMALL LETTER NU
+ 0x03be: 0x00e8, # GREEK SMALL LETTER XI
+ 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON
+ 0x03c0: 0x00ea, # GREEK SMALL LETTER PI
+ 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO
+ 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA
+ 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU
+ 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON
+ 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI
+ 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI
+ 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI
+ 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA
+ 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x2015: 0x008e, # HORIZONTAL BAR
+ 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK
+ 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/cashew/Lib/encodings/cp874.py b/cashew/Lib/encodings/cp874.py
new file mode 100644
index 0000000..6110f46
--- /dev/null
+++ b/cashew/Lib/encodings/cp874.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp874',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\u20ac' # 0x80 -> EURO SIGN
+ u'\ufffe' # 0x81 -> UNDEFINED
+ u'\ufffe' # 0x82 -> UNDEFINED
+ u'\ufffe' # 0x83 -> UNDEFINED
+ u'\ufffe' # 0x84 -> UNDEFINED
+ u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ u'\ufffe' # 0x86 -> UNDEFINED
+ u'\ufffe' # 0x87 -> UNDEFINED
+ u'\ufffe' # 0x88 -> UNDEFINED
+ u'\ufffe' # 0x89 -> UNDEFINED
+ u'\ufffe' # 0x8A -> UNDEFINED
+ u'\ufffe' # 0x8B -> UNDEFINED
+ u'\ufffe' # 0x8C -> UNDEFINED
+ u'\ufffe' # 0x8D -> UNDEFINED
+ u'\ufffe' # 0x8E -> UNDEFINED
+ u'\ufffe' # 0x8F -> UNDEFINED
+ u'\ufffe' # 0x90 -> UNDEFINED
+ u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ u'\u2022' # 0x95 -> BULLET
+ u'\u2013' # 0x96 -> EN DASH
+ u'\u2014' # 0x97 -> EM DASH
+ u'\ufffe' # 0x98 -> UNDEFINED
+ u'\ufffe' # 0x99 -> UNDEFINED
+ u'\ufffe' # 0x9A -> UNDEFINED
+ u'\ufffe' # 0x9B -> UNDEFINED
+ u'\ufffe' # 0x9C -> UNDEFINED
+ u'\ufffe' # 0x9D -> UNDEFINED
+ u'\ufffe' # 0x9E -> UNDEFINED
+ u'\ufffe' # 0x9F -> UNDEFINED
+ u'\xa0' # 0xA0 -> NO-BREAK SPACE
+ u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI
+ u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI
+ u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT
+ u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI
+ u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON
+ u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG
+ u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU
+ u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN
+ u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING
+ u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG
+ u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO
+ u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE
+ u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING
+ u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA
+ u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK
+ u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN
+ u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO
+ u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO
+ u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN
+ u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK
+ u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO
+ u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG
+ u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN
+ u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG
+ u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU
+ u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI
+ u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA
+ u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG
+ u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA
+ u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN
+ u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN
+ u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO
+ u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA
+ u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK
+ u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA
+ u'\u0e24' # 0xC4 -> THAI CHARACTER RU
+ u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING
+ u'\u0e26' # 0xC6 -> THAI CHARACTER LU
+ u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN
+ u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA
+ u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI
+ u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA
+ u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP
+ u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA
+ u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG
+ u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK
+ u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI
+ u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A
+ u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT
+ u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA
+ u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM
+ u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I
+ u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II
+ u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE
+ u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE
+ u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U
+ u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU
+ u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU
+ u'\ufffe' # 0xDB -> UNDEFINED
+ u'\ufffe' # 0xDC -> UNDEFINED
+ u'\ufffe' # 0xDD -> UNDEFINED
+ u'\ufffe' # 0xDE -> UNDEFINED
+ u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT
+ u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E
+ u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE
+ u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O
+ u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN
+ u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI
+ u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO
+ u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK
+ u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU
+ u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK
+ u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO
+ u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI
+ u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA
+ u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT
+ u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT
+ u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN
+ u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN
+ u'\u0e50' # 0xF0 -> THAI DIGIT ZERO
+ u'\u0e51' # 0xF1 -> THAI DIGIT ONE
+ u'\u0e52' # 0xF2 -> THAI DIGIT TWO
+ u'\u0e53' # 0xF3 -> THAI DIGIT THREE
+ u'\u0e54' # 0xF4 -> THAI DIGIT FOUR
+ u'\u0e55' # 0xF5 -> THAI DIGIT FIVE
+ u'\u0e56' # 0xF6 -> THAI DIGIT SIX
+ u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN
+ u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT
+ u'\u0e59' # 0xF9 -> THAI DIGIT NINE
+ u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU
+ u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT
+ u'\ufffe' # 0xFC -> UNDEFINED
+ u'\ufffe' # 0xFD -> UNDEFINED
+ u'\ufffe' # 0xFE -> UNDEFINED
+ u'\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/cp875.py b/cashew/Lib/encodings/cp875.py
new file mode 100644
index 0000000..72b160b
--- /dev/null
+++ b/cashew/Lib/encodings/cp875.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp875',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x9c' # 0x04 -> CONTROL
+ u'\t' # 0x05 -> HORIZONTAL TABULATION
+ u'\x86' # 0x06 -> CONTROL
+ u'\x7f' # 0x07 -> DELETE
+ u'\x97' # 0x08 -> CONTROL
+ u'\x8d' # 0x09 -> CONTROL
+ u'\x8e' # 0x0A -> CONTROL
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x9d' # 0x14 -> CONTROL
+ u'\x85' # 0x15 -> CONTROL
+ u'\x08' # 0x16 -> BACKSPACE
+ u'\x87' # 0x17 -> CONTROL
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x92' # 0x1A -> CONTROL
+ u'\x8f' # 0x1B -> CONTROL
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u'\x80' # 0x20 -> CONTROL
+ u'\x81' # 0x21 -> CONTROL
+ u'\x82' # 0x22 -> CONTROL
+ u'\x83' # 0x23 -> CONTROL
+ u'\x84' # 0x24 -> CONTROL
+ u'\n' # 0x25 -> LINE FEED
+ u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ u'\x1b' # 0x27 -> ESCAPE
+ u'\x88' # 0x28 -> CONTROL
+ u'\x89' # 0x29 -> CONTROL
+ u'\x8a' # 0x2A -> CONTROL
+ u'\x8b' # 0x2B -> CONTROL
+ u'\x8c' # 0x2C -> CONTROL
+ u'\x05' # 0x2D -> ENQUIRY
+ u'\x06' # 0x2E -> ACKNOWLEDGE
+ u'\x07' # 0x2F -> BELL
+ u'\x90' # 0x30 -> CONTROL
+ u'\x91' # 0x31 -> CONTROL
+ u'\x16' # 0x32 -> SYNCHRONOUS IDLE
+ u'\x93' # 0x33 -> CONTROL
+ u'\x94' # 0x34 -> CONTROL
+ u'\x95' # 0x35 -> CONTROL
+ u'\x96' # 0x36 -> CONTROL
+ u'\x04' # 0x37 -> END OF TRANSMISSION
+ u'\x98' # 0x38 -> CONTROL
+ u'\x99' # 0x39 -> CONTROL
+ u'\x9a' # 0x3A -> CONTROL
+ u'\x9b' # 0x3B -> CONTROL
+ u'\x14' # 0x3C -> DEVICE CONTROL FOUR
+ u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ u'\x9e' # 0x3E -> CONTROL
+ u'\x1a' # 0x3F -> SUBSTITUTE
+ u' ' # 0x40 -> SPACE
+ u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA
+ u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA
+ u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA
+ u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA
+ u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON
+ u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA
+ u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA
+ u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA
+ u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA
+ u'[' # 0x4A -> LEFT SQUARE BRACKET
+ u'.' # 0x4B -> FULL STOP
+ u'<' # 0x4C -> LESS-THAN SIGN
+ u'(' # 0x4D -> LEFT PARENTHESIS
+ u'+' # 0x4E -> PLUS SIGN
+ u'!' # 0x4F -> EXCLAMATION MARK
+ u'&' # 0x50 -> AMPERSAND
+ u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA
+ u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA
+ u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU
+ u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU
+ u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI
+ u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON
+ u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI
+ u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO
+ u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA
+ u']' # 0x5A -> RIGHT SQUARE BRACKET
+ u'$' # 0x5B -> DOLLAR SIGN
+ u'*' # 0x5C -> ASTERISK
+ u')' # 0x5D -> RIGHT PARENTHESIS
+ u';' # 0x5E -> SEMICOLON
+ u'^' # 0x5F -> CIRCUMFLEX ACCENT
+ u'-' # 0x60 -> HYPHEN-MINUS
+ u'/' # 0x61 -> SOLIDUS
+ u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU
+ u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON
+ u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI
+ u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI
+ u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI
+ u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA
+ u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ u'|' # 0x6A -> VERTICAL LINE
+ u',' # 0x6B -> COMMA
+ u'%' # 0x6C -> PERCENT SIGN
+ u'_' # 0x6D -> LOW LINE
+ u'>' # 0x6E -> GREATER-THAN SIGN
+ u'?' # 0x6F -> QUESTION MARK
+ u'\xa8' # 0x70 -> DIAERESIS
+ u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS
+ u'\xa0' # 0x74 -> NO-BREAK SPACE
+ u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ u'`' # 0x79 -> GRAVE ACCENT
+ u':' # 0x7A -> COLON
+ u'#' # 0x7B -> NUMBER SIGN
+ u'@' # 0x7C -> COMMERCIAL AT
+ u"'" # 0x7D -> APOSTROPHE
+ u'=' # 0x7E -> EQUALS SIGN
+ u'"' # 0x7F -> QUOTATION MARK
+ u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS
+ u'a' # 0x81 -> LATIN SMALL LETTER A
+ u'b' # 0x82 -> LATIN SMALL LETTER B
+ u'c' # 0x83 -> LATIN SMALL LETTER C
+ u'd' # 0x84 -> LATIN SMALL LETTER D
+ u'e' # 0x85 -> LATIN SMALL LETTER E
+ u'f' # 0x86 -> LATIN SMALL LETTER F
+ u'g' # 0x87 -> LATIN SMALL LETTER G
+ u'h' # 0x88 -> LATIN SMALL LETTER H
+ u'i' # 0x89 -> LATIN SMALL LETTER I
+ u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA
+ u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA
+ u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA
+ u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA
+ u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON
+ u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA
+ u'\xb0' # 0x90 -> DEGREE SIGN
+ u'j' # 0x91 -> LATIN SMALL LETTER J
+ u'k' # 0x92 -> LATIN SMALL LETTER K
+ u'l' # 0x93 -> LATIN SMALL LETTER L
+ u'm' # 0x94 -> LATIN SMALL LETTER M
+ u'n' # 0x95 -> LATIN SMALL LETTER N
+ u'o' # 0x96 -> LATIN SMALL LETTER O
+ u'p' # 0x97 -> LATIN SMALL LETTER P
+ u'q' # 0x98 -> LATIN SMALL LETTER Q
+ u'r' # 0x99 -> LATIN SMALL LETTER R
+ u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA
+ u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA
+ u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA
+ u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA
+ u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA
+ u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU
+ u'\xb4' # 0xA0 -> ACUTE ACCENT
+ u'~' # 0xA1 -> TILDE
+ u's' # 0xA2 -> LATIN SMALL LETTER S
+ u't' # 0xA3 -> LATIN SMALL LETTER T
+ u'u' # 0xA4 -> LATIN SMALL LETTER U
+ u'v' # 0xA5 -> LATIN SMALL LETTER V
+ u'w' # 0xA6 -> LATIN SMALL LETTER W
+ u'x' # 0xA7 -> LATIN SMALL LETTER X
+ u'y' # 0xA8 -> LATIN SMALL LETTER Y
+ u'z' # 0xA9 -> LATIN SMALL LETTER Z
+ u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU
+ u'\u03be' # 0xAB -> GREEK SMALL LETTER XI
+ u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON
+ u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI
+ u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO
+ u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA
+ u'\xa3' # 0xB0 -> POUND SIGN
+ u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+ u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+ u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS
+ u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS
+ u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+ u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA
+ u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU
+ u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON
+ u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI
+ u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI
+ u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI
+ u'{' # 0xC0 -> LEFT CURLY BRACKET
+ u'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ u'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ u'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ u'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ u'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ u'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ u'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ u'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ u'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ u'\xad' # 0xCA -> SOFT HYPHEN
+ u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA
+ u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK
+ u'\u2015' # 0xCF -> HORIZONTAL BAR
+ u'}' # 0xD0 -> RIGHT CURLY BRACKET
+ u'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ u'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ u'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ u'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ u'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ u'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ u'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ u'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ u'\xb1' # 0xDA -> PLUS-MINUS SIGN
+ u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF
+ u'\x1a' # 0xDC -> SUBSTITUTE
+ u'\u0387' # 0xDD -> GREEK ANO TELEIA
+ u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK
+ u'\xa6' # 0xDF -> BROKEN BAR
+ u'\\' # 0xE0 -> REVERSE SOLIDUS
+ u'\x1a' # 0xE1 -> SUBSTITUTE
+ u'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ u'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ u'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ u'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ u'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ u'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ u'\xb2' # 0xEA -> SUPERSCRIPT TWO
+ u'\xa7' # 0xEB -> SECTION SIGN
+ u'\x1a' # 0xEC -> SUBSTITUTE
+ u'\x1a' # 0xED -> SUBSTITUTE
+ u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\xac' # 0xEF -> NOT SIGN
+ u'0' # 0xF0 -> DIGIT ZERO
+ u'1' # 0xF1 -> DIGIT ONE
+ u'2' # 0xF2 -> DIGIT TWO
+ u'3' # 0xF3 -> DIGIT THREE
+ u'4' # 0xF4 -> DIGIT FOUR
+ u'5' # 0xF5 -> DIGIT FIVE
+ u'6' # 0xF6 -> DIGIT SIX
+ u'7' # 0xF7 -> DIGIT SEVEN
+ u'8' # 0xF8 -> DIGIT EIGHT
+ u'9' # 0xF9 -> DIGIT NINE
+ u'\xb3' # 0xFA -> SUPERSCRIPT THREE
+ u'\xa9' # 0xFB -> COPYRIGHT SIGN
+ u'\x1a' # 0xFC -> SUBSTITUTE
+ u'\x1a' # 0xFD -> SUBSTITUTE
+ u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ u'\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/cashew/Lib/encodings/hex_codec.py b/cashew/Lib/encodings/hex_codec.py
new file mode 100644
index 0000000..154488c
--- /dev/null
+++ b/cashew/Lib/encodings/hex_codec.py
@@ -0,0 +1,80 @@
+""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, binascii
+
+### Codec APIs
+
+def hex_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = binascii.b2a_hex(input)
+ return (output, len(input))
+
+def hex_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = binascii.a2b_hex(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ def encode(self, input,errors='strict'):
+ return hex_encode(input,errors)
+ def decode(self, input,errors='strict'):
+ return hex_decode(input,errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ assert self.errors == 'strict'
+ return binascii.b2a_hex(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ assert self.errors == 'strict'
+ return binascii.a2b_hex(input)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='hex',
+ encode=hex_encode,
+ decode=hex_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/cashew/Lib/encodings/hp_roman8.py b/cashew/Lib/encodings/hp_roman8.py
new file mode 100644
index 0000000..dbaaa72
--- /dev/null
+++ b/cashew/Lib/encodings/hp_roman8.py
@@ -0,0 +1,152 @@
+""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py.
+
+ Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen)
+
+ Original source: LaserJet IIP Printer User's Manual HP part no
+ 33471-90901, Hewlet-Packard, June 1989.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_map)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_map)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='hp-roman8',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00a8: 0x00b4, # ACUTE ACCENT
+ 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone)
+ 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
+ 0x00ab: 0x00a8, # DIAERESIS
+ 0x00ac: 0x02dc, # SMALL TILDE
+ 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00af: 0x20a4, # LIRA SIGN
+ 0x00b0: 0x00af, # MACRON
+ 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00b3: 0x00b0, # DEGREE SIGN
+ 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00b9: 0x00bf, # INVERTED QUESTION MARK
+ 0x00ba: 0x00a4, # CURRENCY SIGN
+ 0x00bb: 0x00a3, # POUND SIGN
+ 0x00bc: 0x00a5, # YEN SIGN
+ 0x00bd: 0x00a7, # SECTION SIGN
+ 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00bf: 0x00a2, # CENT SIGN
+ 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE
+ 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x00d7: 0x00e6, # LATIN SMALL LETTER AE
+ 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German)
+ 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic)
+ 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic)
+ 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
+ 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON
+ 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
+ 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic)
+ 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic)
+ 0x00f2: 0x00b7, # MIDDLE DOT
+ 0x00f3: 0x00b5, # MICRO SIGN
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f6: 0x2014, # EM DASH
+ 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00fc: 0x25a0, # BLACK SQUARE
+ 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00fe: 0x00b1, # PLUS-MINUS SIGN
+ 0x00ff: None,
+})
+
+### Encoding Map
+
+encoding_map = codecs.make_encoding_map(decoding_map)
diff --git a/cashew/Lib/encodings/idna.py b/cashew/Lib/encodings/idna.py
new file mode 100644
index 0000000..ea90d67
--- /dev/null
+++ b/cashew/Lib/encodings/idna.py
@@ -0,0 +1,288 @@
+# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
+
+import stringprep, re, codecs
+from unicodedata import ucd_3_2_0 as unicodedata
+
+# IDNA section 3.1
+dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
+
+# IDNA section 5
+ace_prefix = "xn--"
+uace_prefix = unicode(ace_prefix, "ascii")
+
+# This assumes query strings, so AllowUnassigned is true
+def nameprep(label):
+ # Map
+ newlabel = []
+ for c in label:
+ if stringprep.in_table_b1(c):
+ # Map to nothing
+ continue
+ newlabel.append(stringprep.map_table_b2(c))
+ label = u"".join(newlabel)
+
+ # Normalize
+ label = unicodedata.normalize("NFKC", label)
+
+ # Prohibit
+ for c in label:
+ if stringprep.in_table_c12(c) or \
+ stringprep.in_table_c22(c) or \
+ stringprep.in_table_c3(c) or \
+ stringprep.in_table_c4(c) or \
+ stringprep.in_table_c5(c) or \
+ stringprep.in_table_c6(c) or \
+ stringprep.in_table_c7(c) or \
+ stringprep.in_table_c8(c) or \
+ stringprep.in_table_c9(c):
+ raise UnicodeError("Invalid character %r" % c)
+
+ # Check bidi
+ RandAL = map(stringprep.in_table_d1, label)
+ for c in RandAL:
+ if c:
+ # There is a RandAL char in the string. Must perform further
+ # tests:
+ # 1) The characters in section 5.8 MUST be prohibited.
+ # This is table C.8, which was already checked
+ # 2) If a string contains any RandALCat character, the string
+ # MUST NOT contain any LCat character.
+ if filter(stringprep.in_table_d2, label):
+ raise UnicodeError("Violation of BIDI requirement 2")
+
+ # 3) If a string contains any RandALCat character, a
+ # RandALCat character MUST be the first character of the
+ # string, and a RandALCat character MUST be the last
+ # character of the string.
+ if not RandAL[0] or not RandAL[-1]:
+ raise UnicodeError("Violation of BIDI requirement 3")
+
+ return label
+
+def ToASCII(label):
+ try:
+ # Step 1: try ASCII
+ label = label.encode("ascii")
+ except UnicodeError:
+ pass
+ else:
+ # Skip to step 3: UseSTD3ASCIIRules is false, so
+ # Skip to step 8.
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+ # Step 2: nameprep
+ label = nameprep(label)
+
+ # Step 3: UseSTD3ASCIIRules is false
+ # Step 4: try ASCII
+ try:
+ label = label.encode("ascii")
+ except UnicodeError:
+ pass
+ else:
+ # Skip to step 8.
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+ # Step 5: Check ACE prefix
+ if label.startswith(uace_prefix):
+ raise UnicodeError("Label starts with ACE prefix")
+
+ # Step 6: Encode with PUNYCODE
+ label = label.encode("punycode")
+
+ # Step 7: Prepend ACE prefix
+ label = ace_prefix + label
+
+ # Step 8: Check size
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+def ToUnicode(label):
+ # Step 1: Check for ASCII
+ if isinstance(label, str):
+ pure_ascii = True
+ else:
+ try:
+ label = label.encode("ascii")
+ pure_ascii = True
+ except UnicodeError:
+ pure_ascii = False
+ if not pure_ascii:
+ # Step 2: Perform nameprep
+ label = nameprep(label)
+ # It doesn't say this, but apparently, it should be ASCII now
+ try:
+ label = label.encode("ascii")
+ except UnicodeError:
+ raise UnicodeError("Invalid character in IDN label")
+ # Step 3: Check for ACE prefix
+ if not label.startswith(ace_prefix):
+ return unicode(label, "ascii")
+
+ # Step 4: Remove ACE prefix
+ label1 = label[len(ace_prefix):]
+
+ # Step 5: Decode using PUNYCODE
+ result = label1.decode("punycode")
+
+ # Step 6: Apply ToASCII
+ label2 = ToASCII(result)
+
+ # Step 7: Compare the result of step 6 with the one of step 3
+ # label2 will already be in lower case.
+ if label.lower() != label2:
+ raise UnicodeError("IDNA does not round-trip", label, label2)
+
+ # Step 8: return the result of step 5
+ return result
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+ def encode(self,input,errors='strict'):
+
+ if errors != 'strict':
+ # IDNA is quite clear that implementations must be strict
+ raise UnicodeError("unsupported error handling "+errors)
+
+ if not input:
+ return "", 0
+
+ result = []
+ labels = dots.split(input)
+ if labels and len(labels[-1])==0:
+ trailing_dot = '.'
+ del labels[-1]
+ else:
+ trailing_dot = ''
+ for label in labels:
+ result.append(ToASCII(label))
+ # Join with U+002E
+ return ".".join(result)+trailing_dot, len(input)
+
+ def decode(self,input,errors='strict'):
+
+ if errors != 'strict':
+ raise UnicodeError("Unsupported error handling "+errors)
+
+ if not input:
+ return u"", 0
+
+ # IDNA allows decoding to operate on Unicode strings, too.
+ if isinstance(input, unicode):
+ labels = dots.split(input)
+ else:
+ # Must be ASCII string
+ input = str(input)
+ unicode(input, "ascii")
+ labels = input.split(".")
+
+ if labels and len(labels[-1]) == 0:
+ trailing_dot = u'.'
+ del labels[-1]
+ else:
+ trailing_dot = u''
+
+ result = []
+ for label in labels:
+ result.append(ToUnicode(label))
+
+ return u".".join(result)+trailing_dot, len(input)
+
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+ def _buffer_encode(self, input, errors, final):
+ if errors != 'strict':
+ # IDNA is quite clear that implementations must be strict
+ raise UnicodeError("unsupported error handling "+errors)
+
+ if not input:
+ return ("", 0)
+
+ labels = dots.split(input)
+ trailing_dot = u''
+ if labels:
+ if not labels[-1]:
+ trailing_dot = '.'
+ del labels[-1]
+ elif not final:
+ # Keep potentially unfinished label until the next call
+ del labels[-1]
+ if labels:
+ trailing_dot = '.'
+
+ result = []
+ size = 0
+ for label in labels:
+ result.append(ToASCII(label))
+ if size:
+ size += 1
+ size += len(label)
+
+ # Join with U+002E
+ result = ".".join(result) + trailing_dot
+ size += len(trailing_dot)
+ return (result, size)
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def _buffer_decode(self, input, errors, final):
+ if errors != 'strict':
+ raise UnicodeError("Unsupported error handling "+errors)
+
+ if not input:
+ return (u"", 0)
+
+ # IDNA allows decoding to operate on Unicode strings, too.
+ if isinstance(input, unicode):
+ labels = dots.split(input)
+ else:
+ # Must be ASCII string
+ input = str(input)
+ unicode(input, "ascii")
+ labels = input.split(".")
+
+ trailing_dot = u''
+ if labels:
+ if not labels[-1]:
+ trailing_dot = u'.'
+ del labels[-1]
+ elif not final:
+ # Keep potentially unfinished label until the next call
+ del labels[-1]
+ if labels:
+ trailing_dot = u'.'
+
+ result = []
+ size = 0
+ for label in labels:
+ result.append(ToUnicode(label))
+ if size:
+ size += 1
+ size += len(label)
+
+ result = u".".join(result) + trailing_dot
+ size += len(trailing_dot)
+ return (result, size)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='idna',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/cashew/Lib/encodings/iso8859_1.py b/cashew/Lib/encodings/iso8859_1.py
new file mode 100644
index 0000000..71bc13f
--- /dev/null
+++ b/cashew/Lib/encodings/iso8859_1.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso8859-1',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ u'\x00' # 0x00 -> NULL
+ u'\x01' # 0x01 -> START OF HEADING
+ u'\x02' # 0x02 -> START OF TEXT
+ u'\x03' # 0x03 -> END OF TEXT
+ u'\x04' # 0x04 -> END OF TRANSMISSION
+ u'\x05' # 0x05 -> ENQUIRY
+ u'\x06' # 0x06 -> ACKNOWLEDGE
+ u'\x07' # 0x07 -> BELL
+ u'\x08' # 0x08 -> BACKSPACE
+ u'\t' # 0x09 -> HORIZONTAL TABULATION
+ u'\n' # 0x0A -> LINE FEED
+ u'\x0b' # 0x0B -> VERTICAL TABULATION
+ u'\x0c' # 0x0C -> FORM FEED
+ u'\r' # 0x0D -> CARRIAGE RETURN
+ u'\x0e' # 0x0E -> SHIFT OUT
+ u'\x0f' # 0x0F -> SHIFT IN
+ u'\x10' # 0x10 -> DATA LINK ESCAPE
+ u'\x11' # 0x11 -> DEVICE CONTROL ONE
+ u'\x12' # 0x12 -> DEVICE CONTROL TWO
+ u'\x13' # 0x13 -> DEVICE CONTROL THREE
+ u'\x14' # 0x14 -> DEVICE CONTROL FOUR
+ u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ u'\x16' # 0x16 -> SYNCHRONOUS IDLE
+ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ u'\x18' # 0x18 -> CANCEL
+ u'\x19' # 0x19 -> END OF MEDIUM
+ u'\x1a' # 0x1A -> SUBSTITUTE
+ u'\x1b' # 0x1B -> ESCAPE
+ u'\x1c' # 0x1C -> FILE SEPARATOR
+ u'\x1d' # 0x1D -> GROUP SEPARATOR
+ u'\x1e' # 0x1E -> RECORD SEPARATOR
+ u'\x1f' # 0x1F -> UNIT SEPARATOR
+ u' ' # 0x20 -> SPACE
+ u'!' # 0x21 -> EXCLAMATION MARK
+ u'"' # 0x22 -> QUOTATION MARK
+ u'#' # 0x23 -> NUMBER SIGN
+ u'$' # 0x24 -> DOLLAR SIGN
+ u'%' # 0x25 -> PERCENT SIGN
+ u'&' # 0x26 -> AMPERSAND
+ u"'" # 0x27 -> APOSTROPHE
+ u'(' # 0x28 -> LEFT PARENTHESIS
+ u')' # 0x29 -> RIGHT PARENTHESIS
+ u'*' # 0x2A -> ASTERISK
+ u'+' # 0x2B -> PLUS SIGN
+ u',' # 0x2C -> COMMA
+ u'-' # 0x2D -> HYPHEN-MINUS
+ u'.' # 0x2E -> FULL STOP
+ u'/' # 0x2F -> SOLIDUS
+ u'0' # 0x30 -> DIGIT ZERO
+ u'1' # 0x31 -> DIGIT ONE
+ u'2' # 0x32 -> DIGIT TWO
+ u'3' # 0x33 -> DIGIT THREE
+ u'4' # 0x34 -> DIGIT FOUR
+ u'5' # 0x35 -> DIGIT FIVE
+ u'6' # 0x36 -> DIGIT SIX
+ u'7' # 0x37 -> DIGIT SEVEN
+ u'8' # 0x38 -> DIGIT EIGHT
+ u'9' # 0x39 -> DIGIT NINE
+ u':' # 0x3A -> COLON
+ u';' # 0x3B -> SEMICOLON
+ u'<' # 0x3C -> LESS-THAN SIGN
+ u'=' # 0x3D -> EQUALS SIGN
+ u'>' # 0x3E -> GREATER-THAN SIGN
+ u'?' # 0x3F -> QUESTION MARK
+ u'@' # 0x40 -> COMMERCIAL AT
+ u'A' # 0x41 -> LATIN CAPITAL LETTER A
+ u'B' # 0x42 -> LATIN CAPITAL LETTER B
+ u'C' # 0x43 -> LATIN CAPITAL LETTER C
+ u'D' # 0x44 -> LATIN CAPITAL LETTER D
+ u'E' # 0x45 -> LATIN CAPITAL LETTER E
+ u'F' # 0x46 -> LATIN CAPITAL LETTER F
+ u'G' # 0x47 -> LATIN CAPITAL LETTER G
+ u'H' # 0x48 -> LATIN CAPITAL LETTER H
+ u'I' # 0x49 -> LATIN CAPITAL LETTER I
+ u'J' # 0x4A -> LATIN CAPITAL LETTER J
+ u'K' # 0x4B -> LATIN CAPITAL LETTER K
+ u'L' # 0x4C -> LATIN CAPITAL LETTER L
+ u'M' # 0x4D -> LATIN CAPITAL LETTER M
+ u'N' # 0x4E -> LATIN CAPITAL LETTER N
+ u'O' # 0x4F -> LATIN CAPITAL LETTER O
+ u'P' # 0x50 -> LATIN CAPITAL LETTER P
+ u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ u'R' # 0x52 -> LATIN CAPITAL LETTER R
+ u'S' # 0x53 -> LATIN CAPITAL LETTER S
+ u'T' # 0x54 -> LATIN CAPITAL LETTER T
+ u'U' # 0x55 -> LATIN CAPITAL LETTER U
+ u'V' # 0x56 -> LATIN CAPITAL LETTER V
+ u'W' # 0x57 -> LATIN CAPITAL LETTER W
+ u'X' # 0x58 -> LATIN CAPITAL LETTER X
+ u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ u'[' # 0x5B -> LEFT SQUARE BRACKET
+ u'\\' # 0x5C -> REVERSE SOLIDUS
+ u']' # 0x5D -> RIGHT SQUARE BRACKET
+ u'^' # 0x5E -> CIRCUMFLEX ACCENT
+ u'_' # 0x5F -> LOW LINE
+ u'`' # 0x60 -> GRAVE ACCENT
+ u'a' # 0x61 -> LATIN SMALL LETTER A
+ u'b' # 0x62 -> LATIN SMALL LETTER B
+ u'c' # 0x63 -> LATIN SMALL LETTER C
+ u'd' # 0x64 -> LATIN SMALL LETTER D
+ u'e' # 0x65 -> LATIN SMALL LETTER E
+ u'f' # 0x66 -> LATIN SMALL LETTER F
+ u'g' # 0x67 -> LATIN SMALL LETTER G
+ u'h' # 0x68 -> LATIN SMALL LETTER H
+ u'i' # 0x69 -> LATIN SMALL LETTER I
+ u'j' # 0x6A -> LATIN SMALL LETTER J
+ u'k' # 0x6B -> LATIN SMALL LETTER K
+ u'l' # 0x6C -> LATIN SMALL LETTER L
+ u'm' # 0x6D -> LATIN SMALL LETTER M
+ u'n' # 0x6E -> LATIN SMALL LETTER N
+ u'o' # 0x6F -> LATIN SMALL LETTER O
+ u'p' # 0x70 -> LATIN SMALL LETTER P
+ u'q' # 0x71 -> LATIN SMALL LETTER Q
+ u'r' # 0x72 -> LATIN SMALL LETTER R
+ u's' # 0x73 -> LATIN SMALL LETTER S
+ u't' # 0x74 -> LATIN SMALL LETTER T
+ u'u' # 0x75 -> LATIN SMALL LETTER U
+ u'v' # 0x76 -> LATIN SMALL LETTER V
+ u'w' # 0x77 -> LATIN SMALL LETTER W
+ u'x' # 0x78 -> LATIN SMALL LETTER X
+ u'y' # 0x79 -> LATIN SMALL LETTER Y
+ u'z' # 0x7A -> LATIN SMALL LETTER Z
+ u'{' # 0x7B -> LEFT CURLY BRACKET
+ u'|' # 0x7C -> VERTICAL LINE
+ u'}' # 0x7D -> RIGHT CURLY BRACKET
+ u'~' # 0x7E -> TILDE
+ u'\x7f' # 0x7F -> DELETE
+ u'\x80' # 0x80 ->
+ u'\x81' # 0x81 ->
+ u'\x82' # 0x82 ->
+ u'\x83' # 0x83 ->
+ u'\x84' # 0x84 ->
+ u'\x85' # 0x85 ->